Python Parserの例、trollsift.Parser Pythonの例

コード例 #1

0

ファイルを表示

ファイル: gatherer.py プロジェクト: adybbroe/pytroll-collectors

def get_metadata(fname):
    """Parse metadata from the file."""
    res = None
    for section in CONFIG.sections():
        try:
            parser = Parser(CONFIG.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(CONFIG.items(section)))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        res = trigger.fix_start_end_time(res)

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

    return res

コード例 #2

0

ファイルを表示

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

コード例 #3

0

ファイルを表示

 def create_filename(self, fname_pattern, dir_pattern, params=None):
     '''Parse filename for saving.
     '''
     fname = os.path.join(dir_pattern, fname_pattern)
     par = Parser(fname)
     fname = par.compose(params)
     return fname

コード例 #4

0

ファイルを表示

ファイル: segment_gatherer_orig.py プロジェクト: yufeizhu600/pytroll-collectors

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()

        try:
            nameservers = config.get(section, 'nameserver')
            nameservers = nameservers.split()
        except (NoOptionError, ValueError):
            nameservers = []

        try:
            addresses = config.get(section, 'addresses')
            addresses = addresses.split()
        except (NoOptionError, ValueError):
            addresses = None

        try:
            publish_port = config.get(section, 'publish_port')
        except NoOptionError:
            publish_port = 0

        try:
            services = config.get(section, 'services').split()
        except (NoOptionError, ValueError):
            services = ""

        self._listener = ListenerContainer(topics=topics, addresses=addresses
                                           services=services)
        self._publisher = publisher.NoisyPublisher("segment_gatherer",
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)
        try:
            self._time_tolerance = config.getint(section, "time_tolerance")
        except NoOptionError:
            self._time_tolerance = 30
        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')

コード例 #5

0

ファイルを表示

ファイル: postprocessor.py プロジェクト: khunger/dwd_extensions

 def create_filename(self, fname_pattern, dir_pattern, params=None):
     '''Parse filename for saving.
     '''
     fname = os.path.join(dir_pattern, fname_pattern)
     par = Parser(fname)
     fname = par.compose(params)
     return fname

コード例 #6

0

ファイルを表示

    def __init__(self,
                 topic,
                 instrument,
                 config_item,
                 posttroll_port=0,
                 filepattern=None,
                 aliases=None,
                 tbus_orbit=False,
                 history=0,
                 granule_length=0,
                 custom_vars=None,
                 nameservers=[],
                 watchManager=None):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker_" + config_item,
                                   posttroll_port,
                                   topic,
                                   nameservers=nameservers)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = OrderedDict()
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.custom_vars = custom_vars
        self.tbus_orbit = tbus_orbit
        self.granule_length = granule_length
        self._deque = deque([], history)
        self._watchManager = watchManager
        self._watched_dirs = dict()

コード例 #7

0

ファイルを表示

ファイル: geo_gatherer.py プロジェクト: yufeizhu600/pytroll-collectors

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        services = ""
        if config.has_option(section, 'services'):
            services = config.get(section, 'services').split()
        self._listener = ListenerContainer(topics=topics, services=services)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

コード例 #8

0

ファイルを表示

ファイル: post_processing.py プロジェクト: adybbroe/activefires-pp

    def fires_filtering(self, msg, af_shapeff):
        """Read Active Fire data and perform spatial filtering removing false detections.

        Do the national filtering first, and then filter out potential false
        detections by the special mask for that.

        """
        logger.debug(
            "Read VIIRS AF detections and perform quality control and spatial filtering"
        )

        fmda = af_shapeff.metadata
        # metdata contains time and everything but it is not being transfered to the dataframe.attrs

        pout = Parser(self.outfile_pattern_national)
        out_filepath = os.path.join(self.output_dir, pout.compose(fmda))
        logger.debug("Output file path = %s", out_filepath)

        # National filtering:
        af_shapeff.fires_filtering(self.shp_boarders)
        # Metadata should be transfered here!
        afdata_ff = af_shapeff.get_af_data()

        if len(afdata_ff) > 0:
            af_shapeff.fires_filtering(self.shp_filtermask,
                                       start_geometries_index=0,
                                       inside=False)
            afdata_ff = af_shapeff.get_af_data()

        filepath = store_geojson(out_filepath,
                                 afdata_ff,
                                 platform_name=af_shapeff.platform_name)
        out_messages = self.get_output_messages(filepath, msg, len(afdata_ff))

        return out_messages, afdata_ff

コード例 #9

0

ファイルを表示

 def _read_cf_from_string_export(cls, blob):
     """Read blob as a string created by `to_cf`."""
     pattern = "{central:f} {unit:s} ({min:f}-{max:f} {unit2:s})"
     from trollsift import Parser
     parser = Parser(pattern)
     res_dict = parser.parse(blob)
     res_dict.pop('unit2')
     obj = cls(**res_dict)
     return obj

コード例 #10

0

ファイルを表示

def setup(decoder):
    """Setup the granule triggerer.
    """

    granule_triggers = []

    for section in CONFIG.sections():
        regions = [
            get_area_def(region)
            for region in CONFIG.get(section, "regions").split()
        ]

        timeliness = timedelta(minutes=CONFIG.getint(section, "timeliness"))
        try:
            duration = timedelta(seconds=CONFIG.getfloat(section, "duration"))
        except NoOptionError:
            duration = None
        collectors = [
            region_collector.RegionCollector(region, timeliness, duration)
            for region in regions
        ]

        try:
            observer_class = CONFIG.get(section, "watcher")
            pattern = CONFIG.get(section, "pattern")
            parser = Parser(pattern)
            glob = parser.globify()
        except NoOptionError:
            observer_class = None

        try:
            publish_topic = CONFIG.get(section, "publish_topic")
        except NoOptionError:
            publish_topic = None

        if observer_class in ["PollingObserver", "Observer"]:
            LOGGER.debug("Using %s for %s", observer_class, section)
            granule_trigger = \
                trigger.WatchDogTrigger(collectors,
                                        terminator,
                                        decoder,
                                        [glob],
                                        observer_class,
                                        publish_topic=publish_topic)

        else:
            LOGGER.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors,
                terminator,
                CONFIG.get(section, 'service').split(','),
                CONFIG.get(section, 'topics').split(','),
                publish_topic=publish_topic)
        granule_triggers.append(granule_trigger)

    return granule_triggers

コード例 #11

0

ファイルを表示

ファイル: postprocessor.py プロジェクト: khunger/dwd_extensions

    def _resolve(self, params, ref_params):
        resolved_params = dict()
        for k, v in params.items():
            # take only string parameters
            if isinstance(v, (str, unicode)):
                par = Parser(v)
                resolved_params[k] = par.compose(ref_params)
            elif isinstance(v, dict):
                resolved_params[k] = self._resolve(v, ref_params)
            else:
                resolved_params[k] = v

        return resolved_params

コード例 #12

0

ファイルを表示

    def _resolve(self, params, ref_params):
        resolved_params = dict()
        for k, v in params.items():
            # take only string parameters
            if isinstance(v, (str, unicode)):
                par = Parser(v)
                resolved_params[k] = par.compose(ref_params)
            elif isinstance(v, dict):
                resolved_params[k] = self._resolve(v, ref_params)
            else:
                resolved_params[k] = v

        return resolved_params

コード例 #13

0

ファイルを表示

def _match_files_to_pattern(files, path, pattern):
    if pattern is not None:
        parser = Parser(posixpath.join(path, pattern))
        matching_files = []
        for file in files:
            try:
                metadata = parser.parse(file['name'])
                file['metadata'] = metadata
                matching_files.append(file)
            except ValueError:
                pass
        return matching_files
    return files

コード例 #14

0

ファイルを表示

ファイル: gatherer.py プロジェクト: tparker-usgs/trollduction

def setup(decoder):
    """Setup the granule triggerer.
    """

    granule_triggers = []

    for section in CONFIG.sections():
        regions = [get_area_def(region)
                   for region in CONFIG.get(section, "regions").split()]

        timeliness = timedelta(minutes=CONFIG.getint(section, "timeliness"))
        try:
            duration = timedelta(seconds=CONFIG.getfloat(section, "duration"))
        except NoOptionError:
            duration = None
        collectors = [region_collector.RegionCollector(region, timeliness, duration)
                      for region in regions]

        try:
            observer_class = CONFIG.get(section, "watcher")
            pattern = CONFIG.get(section, "pattern")
            parser = Parser(pattern)
            glob = parser.globify()
        except NoOptionError:
            observer_class = None

        try:
            publish_topic = CONFIG.get(section, "publish_topic")
        except NoOptionError:
            publish_topic = None

        if observer_class in ["PollingObserver", "Observer"]:
            LOGGER.debug("Using %s for %s", observer_class, section)
            granule_trigger = \
                trigger.WatchDogTrigger(collectors,
                                        terminator,
                                        decoder,
                                        [glob],
                                        observer_class,
                                        publish_topic=publish_topic)

        else:
            LOGGER.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors, terminator,
                CONFIG.get(section, 'service').split(','),
                CONFIG.get(section, 'topics').split(','),
                publish_topic=publish_topic)
        granule_triggers.append(granule_trigger)

    return granule_triggers

コード例 #15

0

ファイルを表示

ファイル: segment_gatherer.py プロジェクト: tparker-usgs/trollduction

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

コード例 #16

0

ファイルを表示

ファイル: trollstalker2.py プロジェクト: pytroll/pytroll-collectors

    def __init__(self, config):
        self.config = config.copy()
        if isinstance(config["filepattern"], (str, bytes)):
            self.config["filepattern"] = [self.config["filepattern"]]

        self.parsers = [
            Parser(filepattern) for filepattern in self.config["filepattern"]
        ]

        self.aliases = parse_aliases(config)

        self.topic = self.config["topic"]
        self.tbus_orbit = self.config.get("tbus_orbit", False)
        logger.debug("Looking for: %s",
                     str([parser.globify() for parser in self.parsers]))
        AbstractWatchDogProcessor.__init__(
            self, [parser.globify() for parser in self.parsers],
            config.get("watcher", "Observer"))

        self._pub = NoisyPublisher("trollstalker",
                                   int(self.config["posttroll_port"]),
                                   self.config["topic"])
        self.pub = None

        obsolete_keys = [
            "topic", "filepattern", "tbus_orbit", "posttroll_port", "watch",
            "config_item", "configuration_file"
        ]

        for key in list(self.config.keys()):
            if key.startswith("alias_") or key in obsolete_keys:
                del self.config[key]

コード例 #17

0

ファイルを表示

ファイル: geo_gatherer.py プロジェクト: tparker-usgs/trollduction

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

コード例 #18

0

ファイルを表示

ファイル: geographic_gatherer.py プロジェクト: pytroll/pytroll-collectors

    def _get_watchdog_trigger(self, collectors):
        observer_class = self._config_items["watcher"]
        if observer_class not in ["PollingObserver", "Observer"]:
            raise ValueError

        pattern = self._config_items["pattern"]
        parser = Parser(pattern)
        glob = parser.globify()
        publish_topic = self._get_publish_topic()

        logger.debug("Using %s for %s", observer_class, self.section)
        return WatchDogTrigger(collectors,
                               self._config_items, [glob],
                               observer_class,
                               self.publisher,
                               publish_topic=publish_topic)

コード例 #19

0

ファイルを表示

ファイル: post_processing.py プロジェクト: adybbroe/activefires-pp

    def regional_fires_filtering_and_publishing(self, msg, regional_fmask,
                                                afsff_obj):
        """From the regional-fires-filter-mask and the fire detection data send regional messages."""
        logger.debug(
            "Perform regional masking on VIIRS AF detections and publish accordingly."
        )

        afdata = afsff_obj.get_af_data()
        fmda = afsff_obj.metadata

        fmda['platform'] = afsff_obj.platform_name

        pout = Parser(self.outfile_pattern_regional)

        output_messages = []
        regions_with_detections = 0
        for region_name in regional_fmask:
            if not regional_fmask[region_name]['some_inside_test_area']:
                continue

            regions_with_detections = regions_with_detections + 1
            fmda['region_name'] = regional_fmask[region_name]['attributes'][
                'Kod_omr']

            out_filepath = os.path.join(self.output_dir, pout.compose(fmda))
            logger.debug("Output file path = %s", out_filepath)
            data_in_region = afdata[regional_fmask[region_name]['mask']]
            filepath = store_geojson(out_filepath,
                                     data_in_region,
                                     platform_name=fmda['platform'])
            if not filepath:
                logger.warning(
                    "Something wrong happended storing regional " +
                    "data to Geojson - area: {name}".format(str(region_name)))
                continue

            outmsg = self._generate_output_message(filepath, msg,
                                                   regional_fmask[region_name])
            output_messages.append(outmsg)
            logger.info("Geojson file created! Number of fires in region = %d",
                        len(data_in_region))

        logger.debug(
            "Regional masking done. Number of regions with fire " +
            "detections on this granule: %s", str(regions_with_detections))
        return output_messages

コード例 #20

0

ファイルを表示

    def _set_parsers(self):
        """Set parsers"""
        files = self._config["files"]

        for fle in files:
            pattern = fle["pattern"]
            parser = Parser(pattern)
            self._parsers.append(parser)

コード例 #21

0

ファイルを表示

ファイル: gatherer.py プロジェクト: tparker-usgs/trollduction

def get_metadata(fname):
    """Parse metadata from the file.
    """

    res = None
    for section in CONFIG.sections():
        try:
            parser = Parser(CONFIG.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(CONFIG.items(section)))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

    return res

コード例 #22

0

ファイルを表示

def get_metadata(fname):
    """Parse metadata from the file.
    """

    res = None
    for section in CONFIG.sections():
        try:
            parser = Parser(CONFIG.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(CONFIG.items(section)))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

    return res

コード例 #23

0

ファイルを表示

ファイル: segments.py プロジェクト: smhi-erik/pytroll-collectors

    def __init__(self, config):
        """Initialize the segment gatherer."""
        self._config = config
        self._subject = None
        self._patterns = config['patterns']

        self._time_tolerance = config.get("time_tolerance", 30)
        self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200))

        self._num_files_premature_publish = \
            config.get("num_files_premature_publish", -1)

        self.slots = OrderedDict()

        self._parsers = {
            key: Parser(self._patterns[key]['pattern'])
            for key in self._patterns
        }

        self.time_name = config.get('time_name', 'start_time')
        # Floor the scene start time to the given full minutes
        self._group_by_minutes = config.get('group_by_minutes', None)

        # This get the 'keep_parsed_keys' valid for all patterns
        self._keep_parsed_keys = config.get('keep_parsed_keys', [])

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = config.get('providing_server')

        # Convert check time into int minutes variables
        for key in self._patterns:
            if "start_time_pattern" in self._patterns[key]:
                time_conf = self._patterns[key]["start_time_pattern"]
                start_time_str = time_conf.get("start_time", "00:00")
                end_time_str = time_conf.get("end_time", "23:59")
                delta_time_str = time_conf.get("delta_time", "00:01")

                start_h, start_m = start_time_str.split(':')
                end_h, end_m = end_time_str.split(':')
                delta_h, delta_m = delta_time_str.split(':')
                interval = {}
                interval["start"] = (60 * int(start_h)) + int(start_m)
                interval["end"] = (60 * int(end_h)) + int(end_m)
                interval["delta"] = (60 * int(delta_h)) + int(delta_m)

                # Start-End time across midnight
                interval["midnight"] = False
                if interval["start"] > interval["end"]:
                    interval["end"] += 24 * 60
                    interval["midnight"] = True
                self._patterns[key]["_start_time_pattern"] = interval
                self.logger.info(
                    "Start Time pattern '%s' " +
                    "filter start:%s end:%s delta:%s", key, start_time_str,
                    end_time_str, delta_time_str)

コード例 #24

0

ファイルを表示

    def __init__(self,
                 topic,
                 instrument,
                 posttroll_port=0,
                 filepattern=None,
                 aliases=None,
                 tbus_orbit=False):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit

コード例 #25

0

ファイルを表示

ファイル: _base.py プロジェクト: pytroll/pytroll-collectors

    def _get_metadata(self, fname):
        """Parse metadata from the file."""
        parser = Parser(self._config_items["pattern"])

        res = parser.parse(fname)
        res.update(dict(self._config_items))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        res = fix_start_end_time(res)

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

        return res

コード例 #26

0

ファイルを表示

ファイル: post_processing.py プロジェクト: adybbroe/activefires-pp

def get_metadata_from_filename(infile_pattern, filepath):
    """From the filename and its pattern get basic metadata of the satellite observations."""
    p__ = Parser(infile_pattern)
    fname = os.path.basename(filepath)
    try:
        res = p__.parse(fname)
    except ValueError:
        # Do something!
        return None

    # Fix the end time:
    endtime = datetime(res['start_time'].year, res['start_time'].month,
                       res['start_time'].day, res['end_hour'].hour,
                       res['end_hour'].minute, res['end_hour'].second)
    if endtime < res['start_time']:
        endtime = endtime + timedelta(days=1)

    res['end_time'] = endtime

    return res

コード例 #27

0

ファイルを表示

ファイル: gatherer.py プロジェクト: meteoswiss-mdr/trollduction

def get_metadata(fname):
    res = None
    for section in config.sections():
        if section == "default":
            continue
        try:
            parser = Parser(config.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(config.items(section)))

        for key in ["watcher", "pattern", "timeliness"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)
    return res

コード例 #28

0

ファイルを表示

ファイル: gatherer.py プロジェクト: ch-k/trollduction

def get_metadata(fname):
    res = None
    for section in config.sections():
        if section == "default":
            continue
        try:
            parser = Parser(config.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(config.items(section)))

        for key in ["watcher", "pattern", "timeliness"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)
    return res

コード例 #29

0

ファイルを表示

    def _compose_filenames(self, time_slot):
        """Compose filename set()s"""

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            ignored_keys = self._config["config"]["variable_tags"].split(',')
            meta = _copy_without_ignore_items(meta, ignored_keys=ignored_keys)
        except KeyError:
            pass

        critical_files, wanted_files, all_files = [], [], []
        files = self._config["files"]

        for fle in files:
            pattern = fle["pattern"]
            parser = Parser(pattern)
            for seg in fle["segments"]:
                chans = seg.get('channel_name', [''])
                critical_segments = seg.get('critical_segments', [''])
                wanted_segments = seg.get('wanted_segments', [''])
                all_segments = seg.get('all_segments', [''])
                for chan in chans:
                    meta['channel_name'] = chan
                    for seg2 in critical_segments:
                        meta['segment'] = seg2
                        critical_files.append(parser.globify(meta))
                    for seg2 in wanted_segments:
                        meta['segment'] = seg2
                        wanted_files.append(parser.globify(meta))
                    for seg2 in all_segments:
                        meta['segment'] = seg2
                        all_files.append(parser.globify(meta))

        self.slots[time_slot]['critical_files'] = set(critical_files)
        self.slots[time_slot]['wanted_files'] = set(wanted_files)
        self.slots[time_slot]['all_files'] = set(all_files)

コード例 #30

0

ファイルを表示

ファイル: nc_pps_l2.py プロジェクト: junjie2008v/mpop

def extract_filenames_in_time_window(file_list, starttime, endtime):
    """Extract the filenames with time inside the time interval specified.
    NB! Only tested for EARS-NWC granules. This does not support assembling
    several locally received full swaths"""

    # New EARS-NWC filenames:
    # Ex.:
    # W_XX-EUMETSAT-Darmstadt,SING+LEV+SAT,NOAA19+CT_C_EUMS_20150819124700_\
    #  33643.nc.bz2
    pnew = Parser(EARS_PPS_FILE_MASK)

    # Old EARS-NWC filenames:
    # Ex.:
    # ctth_20130910_205300_metopb.h5.bz2
    pold = Parser("{product:s}_{starttime:%Y%m%d_%H%M}00_{platform_name:s}.h5"
                  "{compression:s}")

    plocal = Parser(LOCAL_PPS_FILE_MASK)

    valid_filenames = []
    valid_times = []
    LOG.debug("Time window: (%s, %s)", str(starttime), str(endtime))
    for fname in file_list:
        try:
            data = pnew.parse(os.path.basename(fname))
        except ValueError:
            try:
                data = pold.parse(os.path.basename(fname))
            except ValueError:
                data = plocal.parse(os.path.basename(fname))

        if (data['starttime'] >= starttime and data['starttime'] < endtime):
            valid_filenames.append(fname)
            valid_times.append(data['starttime'])
            LOG.debug("Start time %s inside window", str(data['starttime']))
        else:
            pass

    # Can we rely on the files being sorted according to time?
    # Sort the filenames according to time:
    vtimes = np.array(valid_times)
    idx = np.argsort(vtimes)
    vfiles = np.array(valid_filenames)
    return np.take(vfiles, idx).tolist()

コード例 #31

0

ファイルを表示

ファイル: trollstalker.py プロジェクト: ch-k/trollduction

    def __init__(self, topic, instrument, posttroll_port=0, filepattern=None,
                 aliases=None, tbus_orbit=False):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit

コード例 #32

0

ファイルを表示

    def __init__(self, config):
        self._config = config
        self._subject = None
        self._patterns = config['patterns']

        self._time_tolerance = config.get("time_tolerance", 30)
        self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200))

        self._num_files_premature_publish = \
            config.get("num_files_premature_publish", -1)

        self.slots = OrderedDict()

        self._parsers = {
            key: Parser(self._patterns[key]['pattern'])
            for key in self._patterns
        }

        self.time_name = config.get('time_name', 'start_time')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

        # Convert check time into int minutes variables
        for key in self._patterns:
            if "hour_pattern" in self._patterns[key]:
                checkTime = self._patterns[key]["hour_pattern"]
                # Ex. checkTime =  "06:00 18:00 00:30"
                startTime, endTime, deltaTime = checkTime.split(' ')
                if startTime != '' and endTime != '' and deltaTime != '':
                    startH, startM = startTime.split(':')
                    endH, endM = endTime.split(':')
                    deltaH, deltaM = deltaTime.split(':')
                    self._patterns[key]["_hour_pattern"] = {}
                    self._patterns[key]["_hour_pattern"]["start"] = (
                        60 * int(startH)) + int(startM)
                    self._patterns[key]["_hour_pattern"]["end"] = (
                        60 * int(endH)) + int(endM)
                    self._patterns[key]["_hour_pattern"]["delta"] = (
                        60 * int(deltaH)) + int(deltaM)

                    # Start-End time across midnight
                    self._patterns[key]["_hour_pattern"]["midnight"] = 0
                    if self._patterns[key]["_hour_pattern"][
                            "start"] > self._patterns[key]["_hour_pattern"][
                                "end"]:
                        self._patterns[key]["_hour_pattern"]["end"] += 24 * 60
                        self._patterns[key]["_hour_pattern"]["midnight"] = 1
                    self.logger.info("Hour pattern '%s' filter: %s", key,
                                     checkTime)

コード例 #33

0

ファイルを表示

ファイル: segments.py プロジェクト: mraspaud/pytroll-collectors

    def __init__(self, config):
        self._config = config
        self._subject = None
        self._patterns = config['patterns']

        self._time_tolerance = config.get("time_tolerance", 30)
        self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200))

        self._num_files_premature_publish = \
            config.get("num_files_premature_publish", -1)

        self.slots = OrderedDict()

        self._parsers = {key: Parser(self._patterns[key]['pattern']) for
                         key in self._patterns}

        self.time_name = config.get('time_name', 'start_time')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

コード例 #34

0

ファイルを表示

ファイル: nc_pps_l2.py プロジェクト: tparker-usgs/mpop

def extract_filenames_in_time_window(file_list, starttime, endtime):
    """Extract the filenames with time inside the time interval specified.
    NB! Only tested for EARS-NWC granules. This does not support assembling
    several locally received full swaths"""

    # New EARS-NWC filenames:
    # Ex.:
    # W_XX-EUMETSAT-Darmstadt,SING+LEV+SAT,NOAA19+CT_C_EUMS_20150819124700_\
    #  33643.nc.bz2
    pnew = Parser(EARS_PPS_FILE_MASK)

    # Old EARS-NWC filenames:
    # Ex.:
    # ctth_20130910_205300_metopb.h5.bz2
    pold = Parser("{product:s}_{starttime:%Y%m%d_%H%M}00_{platform_name:s}.h5"
                  "{compression:s}")

    plocal = Parser(LOCAL_PPS_FILE_MASK)

    valid_filenames = []
    valid_times = []
    LOG.debug("Time window: (%s, %s)", str(starttime), str(endtime))
    for fname in file_list:
        try:
            data = pnew.parse(os.path.basename(fname))
        except ValueError:
            try:
                data = pold.parse(os.path.basename(fname))
            except ValueError:
                data = plocal.parse(os.path.basename(fname))

        if (data['starttime'] >= starttime and
                data['starttime'] < endtime):
            valid_filenames.append(fname)
            valid_times.append(data['starttime'])
            LOG.debug("Start time %s inside window", str(data['starttime']))
        else:
            pass

    # Can we rely on the files being sorted according to time?
    # Sort the filenames according to time:
    vtimes = np.array(valid_times)
    idx = np.argsort(vtimes)
    vfiles = np.array(valid_filenames)
    return np.take(vfiles, idx).tolist()

コード例 #35

0

ファイルを表示

ファイル: gatherer.py プロジェクト: meteoswiss-mdr/trollduction

            duration = timedelta(seconds=config.getfloat(section, "duration"))
        except NoOptionError:
            duration = None
        collectors = [
            region_collector.RegionCollector(region, timeliness, duration)
            for region in regions
        ]

        try:
            pattern = config.get(section, "pattern")
            try:
                observer_class = config.get(section, "watcher")
            except NoOptionError:
                observer_class = None
            logger.debug("Using watchdog for %s", section)
            parser = Parser(pattern)

            granule_trigger = trigger.WatchDogTrigger(collectors, terminator,
                                                      decoder,
                                                      [parser.globify()],
                                                      observer_class)

        except NoOptionError:
            logger.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors, terminator,
                config.get(section, 'service').split(','),
                config.get(section, 'topics').split(','))
        granule_triggers.append(granule_trigger)

    pub.start()

コード例 #36

0

ファイルを表示

ファイル: nc_pps_l2.py プロジェクト: tparker-usgs/mpop

    def _determine_prod_and_geo_files(self, prodfilenames):
        """From the list of product files and the products to load determine the
        product files and the geolocation files that will be considered when
        reading the data

        """

        # geofiles4product is a dict listing all geo-locations files applicable
        # for each product.
        # prodfiles4product is a dict listing all product files for a given
        # product name

        prodfiles4product = {}
        geofiles4product = {}
        if prodfilenames:
            if not isinstance(prodfilenames, (list, set, tuple)):
                prodfilenames = [prodfilenames]
            for fname in prodfilenames:
                # Only standard NWCSAF/PPS and EARS-NWC naming accepted!
                # No support for old file names (< PPSv2014)
                if (os.path.basename(fname).startswith("S_NWC") or
                        os.path.basename(fname).startswith("W_XX-EUMETSAT")):
                    if not self._parser:
                        if os.path.basename(fname).startswith("S_NWC"):
                            self._source = 'local'
                            self._parser = Parser(LOCAL_PPS_FILE_MASK)
                        else:
                            self._source = 'ears'
                            self._parser = Parser(EARS_PPS_FILE_MASK)
                else:
                    LOG.info("Unrecognized NWCSAF/PPS file: %s", fname)
                    continue

                parse_info = self._parser.parse(os.path.basename(fname))
                prodname = parse_info['product']

                if prodname not in prodfiles4product:
                    prodfiles4product[prodname] = []

                prodfiles4product[prodname].append(fname)

            # Assemble geolocation information
            if self._source == 'ears':
                # For EARS data, the files have geolocation in themselves
                for prodname, fnames in prodfiles4product.iteritems():
                    geofiles4product[prodname] = fnames
            else:
                # For locally processed data, use the geolocation from
                # the product defined in config

                if self._geolocation_product_name in prodfiles4product:
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = \
                            prodfiles4product[self._geolocation_product_name]
                else:
                    # If the product files with geolocation are not used,
                    # assume that they are still available on the disk.

                    if self._cloud_product_geodir is None:
                        LOG.warning("Config option 'cloud_product_geodir' is not "
                                    "available! Assuming same directory as "
                                    "products.")
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = []
                        for fname in prodfiles4product[prodname]:
                            directory = self._cloud_product_geodir or \
                                os.path.abspath(fname)
                            parse_info = \
                                self._parser.parse(os.path.basename(fname))
                            fname = fname.replace(parse_info['product'],
                                                  self._geolocation_product_name)
                            fname = os.path.join(directory, fname)
                            geofiles4product[prodname].append(fname)

            # Check that each product file has a corresponding geolocation
            # file:
            '''
            if self._geolocation_product_name:
                for prod in products:
                    if prod not in geofiles4product:
                        LOG.error("No product name %s in dict "
                                  "geofiles4product!",
                                  prod)
                        continue
                    if prod not in prodfiles4product:
                        LOG.error("No product name %s in dict "
                                  "prodfiles4product!",
                                  prod)
                        continue
                    if len(geofiles4product[prod]) != \
                       len(prodfiles4product[prod]):
                        LOG.error("Mismatch in number of product files and "
                                  "matching geolocation files!")
            '''

        return prodfiles4product, geofiles4product

コード例 #37

0

ファイルを表示

ファイル: nc_pps_l2.py プロジェクト: tparker-usgs/mpop

class PPSReader(Reader):

    """Reader class for PPS files"""
    pformat = "nc_pps_l2"

    def __init__(self, *args, **kwargs):
        Reader.__init__(self, *args, **kwargs)
        # Source of the data, 'local' or 'ears'
        self._source = None
        # Parser for getting info from the file names
        self._parser = None
        # Satellite config
        self._config = None
        # Location of geolocation files, required for 'local' products
        self._cloud_product_geodir = None
        # Name of the product having geolocation for 'local' products
        self._geolocation_product_name = None

    def _read_config(self, sat_name, instrument_name):
        '''Read config for the satellite'''

        if self._config:
            return

        self._config = ConfigParser()
        configfile = os.path.join(CONFIG_PATH, sat_name + ".cfg")
        LOG.debug("Read configfile %s", configfile)
        self._config.read(configfile)

        try:
            self._cloud_product_geodir = \
                self._config.get(instrument_name + "-level3",
                                 "cloud_product_geodir",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            pass

        LOG.debug("cloud_product_geodir = %s", self._cloud_product_geodir)

        try:
            self._geolocation_product_name = \
                self._config.get(instrument_name + "-level3",
                                 "geolocation_product_name",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            if self._source != 'ears':
                LOG.warning("No geolocation product name given in config, "
                            "using default: %s", GEO_PRODUCT_NAME_DEFAULT)
                self._geolocation_product_name = GEO_PRODUCT_NAME_DEFAULT

    def _determine_prod_and_geo_files(self, prodfilenames):
        """From the list of product files and the products to load determine the
        product files and the geolocation files that will be considered when
        reading the data

        """

        # geofiles4product is a dict listing all geo-locations files applicable
        # for each product.
        # prodfiles4product is a dict listing all product files for a given
        # product name

        prodfiles4product = {}
        geofiles4product = {}
        if prodfilenames:
            if not isinstance(prodfilenames, (list, set, tuple)):
                prodfilenames = [prodfilenames]
            for fname in prodfilenames:
                # Only standard NWCSAF/PPS and EARS-NWC naming accepted!
                # No support for old file names (< PPSv2014)
                if (os.path.basename(fname).startswith("S_NWC") or
                        os.path.basename(fname).startswith("W_XX-EUMETSAT")):
                    if not self._parser:
                        if os.path.basename(fname).startswith("S_NWC"):
                            self._source = 'local'
                            self._parser = Parser(LOCAL_PPS_FILE_MASK)
                        else:
                            self._source = 'ears'
                            self._parser = Parser(EARS_PPS_FILE_MASK)
                else:
                    LOG.info("Unrecognized NWCSAF/PPS file: %s", fname)
                    continue

                parse_info = self._parser.parse(os.path.basename(fname))
                prodname = parse_info['product']

                if prodname not in prodfiles4product:
                    prodfiles4product[prodname] = []

                prodfiles4product[prodname].append(fname)

            # Assemble geolocation information
            if self._source == 'ears':
                # For EARS data, the files have geolocation in themselves
                for prodname, fnames in prodfiles4product.iteritems():
                    geofiles4product[prodname] = fnames
            else:
                # For locally processed data, use the geolocation from
                # the product defined in config

                if self._geolocation_product_name in prodfiles4product:
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = \
                            prodfiles4product[self._geolocation_product_name]
                else:
                    # If the product files with geolocation are not used,
                    # assume that they are still available on the disk.

                    if self._cloud_product_geodir is None:
                        LOG.warning("Config option 'cloud_product_geodir' is not "
                                    "available! Assuming same directory as "
                                    "products.")
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = []
                        for fname in prodfiles4product[prodname]:
                            directory = self._cloud_product_geodir or \
                                os.path.abspath(fname)
                            parse_info = \
                                self._parser.parse(os.path.basename(fname))
                            fname = fname.replace(parse_info['product'],
                                                  self._geolocation_product_name)
                            fname = os.path.join(directory, fname)
                            geofiles4product[prodname].append(fname)

            # Check that each product file has a corresponding geolocation
            # file:
            '''
            if self._geolocation_product_name:
                for prod in products:
                    if prod not in geofiles4product:
                        LOG.error("No product name %s in dict "
                                  "geofiles4product!",
                                  prod)
                        continue
                    if prod not in prodfiles4product:
                        LOG.error("No product name %s in dict "
                                  "prodfiles4product!",
                                  prod)
                        continue
                    if len(geofiles4product[prod]) != \
                       len(prodfiles4product[prod]):
                        LOG.error("Mismatch in number of product files and "
                                  "matching geolocation files!")
            '''

        return prodfiles4product, geofiles4product

    def load(self, satscene, **kwargs):
        """Read data from file and load it into *satscene*.
        """

        prodfilenames = kwargs.get('filename')
        time_interval = kwargs.get('time_interval')
        if prodfilenames and time_interval:
            LOG.warning("You have specified both a list of files " +
                        "and a time interval")
            LOG.warning("Specifying a time interval will only take effect " +
                        "if no files are specified")
            time_interval = None

        products = satscene.channels_to_load & set(PPS_PRODUCTS)
        if len(products) == 0:
            LOG.debug("No PPS cloud products to load, abort")
            return

        self._read_config(satscene.fullname, satscene.instrument_name)

        LOG.info("Products to load: %s", str(products))

        # If a list of files are provided to the load call, we disregard the
        # direcorty and filename specifications/definitions in the config file.

        if not prodfilenames:
            try:
                area_name = satscene.area_id or satscene.area.area_id
            except AttributeError:
                area_name = "satproj_?????_?????"

            # Make the list of files for the requested products:
            if isinstance(time_interval, (tuple, set, list)) and \
               len(time_interval) == 2:
                time_start, time_end = time_interval
            else:
                time_start, time_end = satscene.time_slot, None

            LOG.debug(
                "Start and end times: %s %s", str(time_start), str(time_end))
            prodfilenames = get_filenames(satscene, products, self._config,
                                          (time_start, time_end), area_name)

        LOG.debug("Product files: %s", str(prodfilenames))

        retv = self._determine_prod_and_geo_files(prodfilenames)
        prodfiles4product, geofiles4product = retv

        # Reading the products
        classes = {"CTTH": CloudTopTemperatureHeight,
                   "CT": CloudType,
                   "CMA": CloudMask,
                   "PC": PrecipitationClouds,
                   "CPP": CloudPhysicalProperties
                   }
        nodata_mask = False

        read_external_geo = {}
        for product in products:
            LOG.debug("Loading %s", product)

            if product not in prodfiles4product:
                LOG.warning("No files found for product: %s", product)
                continue

            pps_band = PPSProductData(prodfiles4product[product]).read()
            chn = classes[product]()
            chn.read(pps_band)

            if not chn.name in satscene:
                LOG.info("Adding new channel %s", chn.name)
                satscene.channels.append(chn)

            # Check if geolocation is loaded:
            if not chn.area:
                read_external_geo[product] = satscene.channels[-1].name

        # Check if some 'channel'/product needs geolocation. If some
        # product does not have geolocation, get it from the
        # geofilename:
        from pyresample import geometry

        # Load geolocation
        for chn_name in read_external_geo.values():
            LOG.debug("ch_name = %s", str(chn_name))
            chn = satscene[chn_name]
            geofilenames = geofiles4product[chn_name]
            LOG.debug("Geo-files = %s", str(geofilenames))
            geoloc = PpsGeolocationData(chn.shape,
                                        chn.granule_lengths,
                                        geofilenames).read()

            try:
                satscene[chn.name].area = geometry.SwathDefinition(
                    lons=geoloc.longitudes, lats=geoloc.latitudes)

                area_name = ("swath_" + satscene.fullname + "_" +
                             str(satscene.time_slot) + "_"
                             + str(chn.shape) + "_" +
                             chn.name)
                satscene[chn.name].area.area_id = area_name
                satscene[chn.name].area_id = area_name
            except ValueError:
                LOG.exception('Failed making a SwathDefinition: ' +
                              'min,max lons,lats = (%f %f") (%f,%f)',
                              geoloc.longitudes.data.min(),
                              geoloc.longitudes.data.max(),
                              geoloc.latitudes.data.min(),
                              geoloc.latitudes.data.max())
                LOG.warning("No geolocation loaded for %s", str(chn_name))

        # PpsGeolocationData.clear_cache()

        return

コード例 #38

0

ファイルを表示

ファイル: trollstalker.py プロジェクト: tparker-usgs/trollduction

class EventHandler(ProcessEvent):

    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """

    def __init__(self, topic, instrument, posttroll_port=0, filepattern=None,
                 aliases=None, tbus_orbit=False, history=0, granule_length=0):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit
        self.granule_length = granule_length
        self._deque = deque([], history)

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = {}

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info dict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                # Check if this file has been recently dealt with
                if event.pathname not in self._deque:
                    self._deque.append(event.pathname)
                    message = self.create_message()
                    LOGGER.info("Publishing message %s", str(message))
                    self.pub.send(str(message))
                else:
                    LOGGER.info("Data has been published recently, skipping.")
            self.__clean__()

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', self.info)

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s",
                         self.file_parser.fmt, event.pathname)
            self.info = self.file_parser.parse(
                os.path.basename(event.pathname))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = {}
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                info = self.info.copy()
                for key in info:
                    if key in self.aliases:
                        self.info['orig_'+key] = self.info[key]
                        self.info[key] = self.aliases[key][str(self.info[key])]

            # add start_time and end_time if not present
            try:
                base_time = self.info["time"]
            except KeyError:
                try:
                    base_time = self.info["nominal_time"]
                except KeyError:
                    base_time = self.info["start_time"]
            if "start_time" not in self.info:
                self.info["start_time"] = base_time
            if "start_date" in self.info:
                self.info["start_time"] = \
                    dt.datetime.combine(self.info["start_date"].date(),
                                        self.info["start_time"].time())
                if "end_date" not in self.info:
                    self.info["end_date"] = self.info["start_date"]
                del self.info["start_date"]
            if "end_date" in self.info:
                self.info["end_time"] = \
                    dt.datetime.combine(self.info["end_date"].date(),
                                        self.info["end_time"].time())
                del self.info["end_date"]
            if "end_time" not in self.info and self.granule_length > 0:
                self.info["end_time"] = base_time + dt.timedelta(seconds=self.granule_length)

            if "end_time" in self.info:
                while self.info["start_time"] > self.info["end_time"]:
                    self.info["end_time"] += dt.timedelta(days=1)

コード例 #39

0

ファイルを表示

ファイル: trollstalker.py プロジェクト: ch-k/trollduction

class EventHandler(ProcessEvent):

    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """

    def __init__(self, topic, instrument, posttroll_port=0, filepattern=None,
                 aliases=None, tbus_orbit=False):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = {}

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info dict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                message = self.create_message()
                LOGGER.info("Publishing message %s" % str(message))
                self.pub.send(str(message))
            self.__clean__()

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', self.info)

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s",
                         self.file_parser.fmt, event.pathname)
            self.info = self.file_parser.parse(
                os.path.basename(event.pathname))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = {}
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                for key in self.info:
                    if key in self.aliases:
                        self.info[key] = self.aliases[key][str(self.info[key])]

コード例 #40

0

ファイルを表示

ファイル: nc_pps_l2.py プロジェクト: junjie2008v/mpop

class PPSReader(Reader):
    """Reader class for PPS files"""
    pformat = "nc_pps_l2"

    def __init__(self, *args, **kwargs):
        Reader.__init__(self, *args, **kwargs)
        # Source of the data, 'local' or 'ears'
        self._source = None
        # Parser for getting info from the file names
        self._parser = None
        # Satellite config
        self._config = None
        # Location of geolocation files, required for 'local' products
        self._cloud_product_geodir = None
        # Name of the product having geolocation for 'local' products
        self._geolocation_product_name = None

    def _read_config(self, sat_name, instrument_name):
        '''Read config for the satellite'''

        if self._config:
            return

        self._config = ConfigParser()
        configfile = os.path.join(CONFIG_PATH, sat_name + ".cfg")
        LOG.debug("Read configfile %s", configfile)
        self._config.read(configfile)

        try:
            self._cloud_product_geodir = \
                self._config.get(instrument_name + "-level3",
                                 "cloud_product_geodir",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            pass

        LOG.debug("cloud_product_geodir = %s", self._cloud_product_geodir)

        try:
            self._geolocation_product_name = \
                self._config.get(instrument_name + "-level3",
                                 "geolocation_product_name",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            if self._source != 'ears':
                LOG.warning(
                    "No geolocation product name given in config, "
                    "using default: %s", GEO_PRODUCT_NAME_DEFAULT)
                self._geolocation_product_name = GEO_PRODUCT_NAME_DEFAULT

    def _determine_prod_and_geo_files(self, prodfilenames):
        """From the list of product files and the products to load determine the
        product files and the geolocation files that will be considered when
        reading the data

        """

        # geofiles4product is a dict listing all geo-locations files applicable
        # for each product.
        # prodfiles4product is a dict listing all product files for a given
        # product name

        prodfiles4product = {}
        geofiles4product = {}
        if prodfilenames:
            if not isinstance(prodfilenames, (list, set, tuple)):
                prodfilenames = [prodfilenames]
            for fname in prodfilenames:
                # Only standard NWCSAF/PPS and EARS-NWC naming accepted!
                # No support for old file names (< PPSv2014)
                if (os.path.basename(fname).startswith("S_NWC") or
                        os.path.basename(fname).startswith("W_XX-EUMETSAT")):
                    if not self._parser:
                        if os.path.basename(fname).startswith("S_NWC"):
                            self._source = 'local'
                            self._parser = Parser(LOCAL_PPS_FILE_MASK)
                        else:
                            self._source = 'ears'
                            self._parser = Parser(EARS_PPS_FILE_MASK)
                else:
                    LOG.info("Unrecognized NWCSAF/PPS file: %s", fname)
                    continue

                parse_info = self._parser.parse(os.path.basename(fname))
                prodname = parse_info['product']

                if prodname not in prodfiles4product:
                    prodfiles4product[prodname] = []

                prodfiles4product[prodname].append(fname)

            # Assemble geolocation information
            if self._source == 'ears':
                # For EARS data, the files have geolocation in themselves
                for prodname, fnames in prodfiles4product.iteritems():
                    geofiles4product[prodname] = fnames
            else:
                # For locally processed data, use the geolocation from
                # the product defined in config

                if self._geolocation_product_name in prodfiles4product:
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = \
                            prodfiles4product[self._geolocation_product_name]
                else:
                    # If the product files with geolocation are not used,
                    # assume that they are still available on the disk.

                    if self._cloud_product_geodir is None:
                        LOG.warning(
                            "Config option 'cloud_product_geodir' is not "
                            "available! Assuming same directory as "
                            "products.")
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = []
                        for fname in prodfiles4product[prodname]:
                            directory = self._cloud_product_geodir or \
                                os.path.abspath(fname)
                            parse_info = \
                                self._parser.parse(os.path.basename(fname))
                            fname = fname.replace(
                                parse_info['product'],
                                self._geolocation_product_name)
                            fname = os.path.join(directory, fname)
                            geofiles4product[prodname].append(fname)

            # Check that each product file has a corresponding geolocation
            # file:
            '''
            if self._geolocation_product_name:
                for prod in products:
                    if prod not in geofiles4product:
                        LOG.error("No product name %s in dict "
                                  "geofiles4product!",
                                  prod)
                        continue
                    if prod not in prodfiles4product:
                        LOG.error("No product name %s in dict "
                                  "prodfiles4product!",
                                  prod)
                        continue
                    if len(geofiles4product[prod]) != \
                       len(prodfiles4product[prod]):
                        LOG.error("Mismatch in number of product files and "
                                  "matching geolocation files!")
            '''

        return prodfiles4product, geofiles4product

    def load(self, satscene, **kwargs):
        """Read data from file and load it into *satscene*.
        """

        prodfilenames = kwargs.get('filename')
        time_interval = kwargs.get('time_interval')
        if prodfilenames and time_interval:
            LOG.warning("You have specified both a list of files " +
                        "and a time interval")
            LOG.warning("Specifying a time interval will only take effect " +
                        "if no files are specified")
            time_interval = None

        products = satscene.channels_to_load & set(PPS_PRODUCTS)
        if len(products) == 0:
            LOG.debug("No PPS cloud products to load, abort")
            return

        self._read_config(satscene.fullname, satscene.instrument_name)

        LOG.info("Products to load: %s", str(products))

        # If a list of files are provided to the load call, we disregard the
        # direcorty and filename specifications/definitions in the config file.

        if not prodfilenames:
            try:
                area_name = satscene.area_id or satscene.area.area_id
            except AttributeError:
                area_name = "satproj_?????_?????"

            # Make the list of files for the requested products:
            if isinstance(time_interval, (tuple, set, list)) and \
               len(time_interval) == 2:
                time_start, time_end = time_interval
            else:
                time_start, time_end = satscene.time_slot, None

            LOG.debug("Start and end times: %s %s", str(time_start),
                      str(time_end))
            prodfilenames = get_filenames(satscene, products, self._config,
                                          (time_start, time_end), area_name)

        LOG.debug("Product files: %s", str(prodfilenames))

        retv = self._determine_prod_and_geo_files(prodfilenames)
        prodfiles4product, geofiles4product = retv

        # Reading the products
        classes = {
            "CTTH": CloudTopTemperatureHeight,
            "CT": CloudType,
            "CMA": CloudMask,
            "PC": PrecipitationClouds,
            "CPP": CloudPhysicalProperties
        }
        nodata_mask = False

        read_external_geo = {}
        for product in products:
            LOG.debug("Loading %s", product)

            if product not in prodfiles4product:
                LOG.warning("No files found for product: %s", product)
                continue

            pps_band = PPSProductData(prodfiles4product[product]).read()
            chn = classes[product]()
            chn.read(pps_band)

            if not chn.name in satscene:
                LOG.info("Adding new channel %s", chn.name)
                satscene.channels.append(chn)

            # Check if geolocation is loaded:
            if not chn.area:
                read_external_geo[product] = satscene.channels[-1].name

        # Check if some 'channel'/product needs geolocation. If some
        # product does not have geolocation, get it from the
        # geofilename:
        from pyresample import geometry

        # Load geolocation
        for chn_name in read_external_geo.values():
            LOG.debug("ch_name = %s", str(chn_name))
            chn = satscene[chn_name]
            geofilenames = geofiles4product[chn_name]
            LOG.debug("Geo-files = %s", str(geofilenames))
            geoloc = PpsGeolocationData(chn.shape, chn.granule_lengths,
                                        geofilenames).read()

            try:
                satscene[chn.name].area = geometry.SwathDefinition(
                    lons=geoloc.longitudes, lats=geoloc.latitudes)

                area_name = ("swath_" + satscene.fullname + "_" +
                             str(satscene.time_slot) + "_" + str(chn.shape) +
                             "_" + chn.name)
                satscene[chn.name].area.area_id = area_name
                satscene[chn.name].area_id = area_name
            except ValueError:
                LOG.exception(
                    'Failed making a SwathDefinition: ' +
                    'min,max lons,lats = (%f %f") (%f,%f)',
                    geoloc.longitudes.data.min(), geoloc.longitudes.data.max(),
                    geoloc.latitudes.data.min(), geoloc.latitudes.data.max())
                LOG.warning("No geolocation loaded for %s", str(chn_name))

        # PpsGeolocationData.clear_cache()

        return

コード例 #41

0

ファイルを表示

class EventHandler(ProcessEvent):
    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """
    def __init__(self,
                 topic,
                 instrument,
                 config_item,
                 posttroll_port=0,
                 filepattern=None,
                 aliases=None,
                 tbus_orbit=False,
                 history=0,
                 granule_length=0,
                 custom_vars=None,
                 nameservers=[],
                 watchManager=None):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker_" + config_item,
                                   posttroll_port,
                                   topic,
                                   nameservers=nameservers)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = OrderedDict()
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.custom_vars = custom_vars
        self.tbus_orbit = tbus_orbit
        self.granule_length = granule_length
        self._deque = deque([], history)
        self._watchManager = watchManager
        self._watched_dirs = dict()

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = OrderedDict()

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process_IN_DELETE(self, event):
        """On delete."""
        if (event.mask & pyinotify.IN_ISDIR):
            try:
                try:
                    self._watchManager.rm_watch(
                        self._watched_dirs[event.pathname], quiet=False)
                except pyinotify.WatchManagerError:
                    #As the directory is deleted prior removing the watch will cause a error message
                    #from pyinotify. This is ok, so just pass the exception.
                    LOGGER.debug("Removed watch: {}".format(event.pathname))
                    pass
                finally:
                    del self._watched_dirs[event.pathname]
            except KeyError:
                LOGGER.warning(
                    "Dir {} not watched by inotify. Can not delete watch.".
                    format(event.pathname))
        return

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info OrderedDict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                # Check if this file has been recently dealt with
                if event.pathname not in self._deque:
                    self._deque.append(event.pathname)
                    message = self.create_message()
                    LOGGER.info("Publishing message %s", str(message))
                    self.pub.send(str(message))
                else:
                    LOGGER.info("Data has been published recently, skipping.")
            self.__clean__()
        elif (event.mask & pyinotify.IN_ISDIR):
            tmask = (pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO
                     | pyinotify.IN_CREATE | pyinotify.IN_DELETE)
            try:
                self._watched_dirs.update(
                    self._watchManager.add_watch(event.pathname, tmask))
                LOGGER.debug("Added watch on dir: {}".format(event.pathname))
            except AttributeError:
                LOGGER.error(
                    "No watchmanager given. Can not add watch on {}".format(
                        event.pathname))
                pass

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', dict(self.info))

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s", self.file_parser.fmt,
                         event.pathname)
            pathname_join = os.path.basename(event.pathname)
            if 'origin_inotify_base_dir_skip_levels' in self.custom_vars:
                pathname_list = event.pathname.split('/')
                pathname_join = "/".join(pathname_list[int(
                    self.custom_vars['origin_inotify_base_dir_skip_levels']):])
            else:
                LOGGER.debug(
                    "No origin_inotify_base_dir_skip_levels in self.custom_vars"
                )

            self.info = OrderedDict()
            self.info.update(self.file_parser.parse(pathname_join))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = OrderedDict()
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                info = self.info.copy()
                for key in info:
                    if key in self.aliases:
                        self.info['orig_' + key] = self.info[key]
                        self.info[key] = self.aliases[key][str(self.info[key])]

            # add start_time and end_time if not present
            try:
                base_time = self.info["time"]
            except KeyError:
                try:
                    base_time = self.info["nominal_time"]
                except KeyError:
                    base_time = self.info["start_time"]
            if "start_time" not in self.info:
                self.info["start_time"] = base_time
            if "start_date" in self.info:
                self.info["start_time"] = \
                    dt.datetime.combine(self.info["start_date"].date(),
                                        self.info["start_time"].time())
                if "end_date" not in self.info:
                    self.info["end_date"] = self.info["start_date"]
                del self.info["start_date"]
            if "end_date" in self.info:
                self.info["end_time"] = \
                    dt.datetime.combine(self.info["end_date"].date(),
                                        self.info["end_time"].time())
                del self.info["end_date"]
            if "end_time" not in self.info and self.granule_length > 0:
                self.info["end_time"] = base_time + \
                    dt.timedelta(seconds=self.granule_length)

            if "end_time" in self.info:
                while self.info["start_time"] > self.info["end_time"]:
                    self.info["end_time"] += dt.timedelta(days=1)

            if self.custom_vars is not None:
                for var_name in self.custom_vars:
                    var_pattern = self.custom_vars[var_name]
                    var_val = None
                    if '%' in var_pattern:
                        var_val = helper_functions.create_aligned_datetime_var(
                            var_pattern, self.info)
                    if var_val is None:
                        var_val = compose(var_pattern, self.info)
                    self.info[var_name] = var_val

コード例 #42

0

ファイルを表示

ファイル: geo_gatherer.py プロジェクト: tparker-usgs/trollduction

class GeoGatherer(object):

    """Gatherer for geostationary satellite segments"""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = ['%06d' % i for i in range(int(segments[0]),
                                                          int(segments[-1])+1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s", file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(\
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({'uri': msg.data['uri'],
                                         'uid': msg.data['uid']})

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])

コード例 #43

0

ファイルを表示

ファイル: gatherer.py プロジェクト: ch-k/trollduction

        timeliness = timedelta(minutes=config.getint(section, "timeliness"))
        try:
            duration = timedelta(seconds=config.getfloat(section, "duration"))
        except NoOptionError:
            duration = None
        collectors = [region_collector.RegionCollector(
            region, timeliness, duration) for region in regions]

        try:
            pattern = config.get(section, "pattern")
            try:
                observer_class = config.get(section, "watcher")
            except NoOptionError:
                observer_class = None
            logger.debug("Using watchdog for %s", section)
            parser = Parser(pattern)

            granule_trigger = trigger.WatchDogTrigger(collectors, terminator,
                                                      decoder,
                                                      [parser.globify()],
                                                      observer_class)

        except NoOptionError:
            logger.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors, terminator,
                config.get(section, 'service').split(','),
                config.get(section, 'topics').split(','))
        granule_triggers.append(granule_trigger)

    pub.start()

コード例 #44

0

ファイルを表示

ファイル: segment_gatherer.py プロジェクト: tparker-usgs/trollduction

class SegmentGatherer(object):

    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, msg, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = {}
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                metadata[key] = msg.data[key]
        metadata['dataset'] = []

        # Use also metadata parsed from the filenames
        metadata.update(mda)

        time_slot = str(metadata[self.time_name])
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                    self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            meta = _copy_without_ignore_items(
                meta, ignored_keys=self._config.get(self._section,
                                                    'variable_tags').split(','))
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = ['%d' % i for i in range(int(segments[0]),
                                                    int(segments[-1]) + 1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.globify(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s", file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = \
            slot['wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = \
            len(wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received,
                          time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(
                slot['received_files']):
            self.logger.info("All files received for slot %s.",
                             time_slot)
            return SLOT_READY

        if slot['critical_files'].issubset(slot['received_files']):
            # All critical files have been received
            if slot['timeout'] is None:
                # Set timeout
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s.",
                                 str(slot['timeout']),
                                 time_slot)
                return SLOT_NOT_READY
            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection ready
                self.logger.info("Timeout occured, required files received "
                                 "for slot %s.", time_slot)
                return SLOT_READY
            else:
                pass
        else:
            if slot['timeout'] is None:
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s",
                                 str(slot['timeout']),
                                 time_slot)
                return SLOT_NOT_READY

            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection is obsolete
                self.logger.warning("Timeout occured and required files "
                                    "were not present, data discarded for "
                                    "slot %s.",
                                    time_slot)
                return SLOT_OBSOLETE_TIMEOUT
            else:
                pass

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return
        time_slot = str(mda[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(msg, mda)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            mda = _copy_without_ignore_items(
                mda, ignored_keys=self._config.get(self._section,
                                                   'variable_tags').split(','))
        except NoOptionError:
            pass

        mask = self._parser.globify(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({'uri': msg.data['uri'],
                                            'uid': msg.data['uid']})

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)

コード例 #45

0

ファイルを表示

ファイル: nc_pps_l2.py プロジェクト: junjie2008v/mpop

    def _determine_prod_and_geo_files(self, prodfilenames):
        """From the list of product files and the products to load determine the
        product files and the geolocation files that will be considered when
        reading the data

        """

        # geofiles4product is a dict listing all geo-locations files applicable
        # for each product.
        # prodfiles4product is a dict listing all product files for a given
        # product name

        prodfiles4product = {}
        geofiles4product = {}
        if prodfilenames:
            if not isinstance(prodfilenames, (list, set, tuple)):
                prodfilenames = [prodfilenames]
            for fname in prodfilenames:
                # Only standard NWCSAF/PPS and EARS-NWC naming accepted!
                # No support for old file names (< PPSv2014)
                if (os.path.basename(fname).startswith("S_NWC") or
                        os.path.basename(fname).startswith("W_XX-EUMETSAT")):
                    if not self._parser:
                        if os.path.basename(fname).startswith("S_NWC"):
                            self._source = 'local'
                            self._parser = Parser(LOCAL_PPS_FILE_MASK)
                        else:
                            self._source = 'ears'
                            self._parser = Parser(EARS_PPS_FILE_MASK)
                else:
                    LOG.info("Unrecognized NWCSAF/PPS file: %s", fname)
                    continue

                parse_info = self._parser.parse(os.path.basename(fname))
                prodname = parse_info['product']

                if prodname not in prodfiles4product:
                    prodfiles4product[prodname] = []

                prodfiles4product[prodname].append(fname)

            # Assemble geolocation information
            if self._source == 'ears':
                # For EARS data, the files have geolocation in themselves
                for prodname, fnames in prodfiles4product.iteritems():
                    geofiles4product[prodname] = fnames
            else:
                # For locally processed data, use the geolocation from
                # the product defined in config

                if self._geolocation_product_name in prodfiles4product:
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = \
                            prodfiles4product[self._geolocation_product_name]
                else:
                    # If the product files with geolocation are not used,
                    # assume that they are still available on the disk.

                    if self._cloud_product_geodir is None:
                        LOG.warning(
                            "Config option 'cloud_product_geodir' is not "
                            "available! Assuming same directory as "
                            "products.")
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = []
                        for fname in prodfiles4product[prodname]:
                            directory = self._cloud_product_geodir or \
                                os.path.abspath(fname)
                            parse_info = \
                                self._parser.parse(os.path.basename(fname))
                            fname = fname.replace(
                                parse_info['product'],
                                self._geolocation_product_name)
                            fname = os.path.join(directory, fname)
                            geofiles4product[prodname].append(fname)

            # Check that each product file has a corresponding geolocation
            # file:
            '''
            if self._geolocation_product_name:
                for prod in products:
                    if prod not in geofiles4product:
                        LOG.error("No product name %s in dict "
                                  "geofiles4product!",
                                  prod)
                        continue
                    if prod not in prodfiles4product:
                        LOG.error("No product name %s in dict "
                                  "prodfiles4product!",
                                  prod)
                        continue
                    if len(geofiles4product[prod]) != \
                       len(prodfiles4product[prod]):
                        LOG.error("Mismatch in number of product files and "
                                  "matching geolocation files!")
            '''

        return prodfiles4product, geofiles4product

コード例 #46

0

ファイルを表示

ファイル: geo_gatherer.py プロジェクト: yufeizhu600/pytroll-collectors

class GeoGatherer(object):
    """Gatherer for geostationary satellite segments"""
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        services = ""
        if config.has_option(section, 'services'):
            services = config.get(section, 'services').split()
        self._listener = ListenerContainer(topics=topics, services=services)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = [
                    '%06d' % i for i in range(int(segments[0]),
                                              int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        if self._providing_server and self._providing_server != msg.host:
            return

        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({
            'uri': msg.data['uri'],
            'uid': msg.data['uid']
        })

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])