コード例 #1
0
 def create_filename(self, fname_pattern, dir_pattern, params=None):
     '''Parse filename for saving.
     '''
     fname = os.path.join(dir_pattern, fname_pattern)
     par = Parser(fname)
     fname = par.compose(params)
     return fname
コード例 #2
0
 def create_filename(self, fname_pattern, dir_pattern, params=None):
     '''Parse filename for saving.
     '''
     fname = os.path.join(dir_pattern, fname_pattern)
     par = Parser(fname)
     fname = par.compose(params)
     return fname
コード例 #3
0
    def _resolve(self, params, ref_params):
        resolved_params = dict()
        for k, v in params.items():
            # take only string parameters
            if isinstance(v, (str, unicode)):
                par = Parser(v)
                resolved_params[k] = par.compose(ref_params)
            elif isinstance(v, dict):
                resolved_params[k] = self._resolve(v, ref_params)
            else:
                resolved_params[k] = v

        return resolved_params
コード例 #4
0
    def _resolve(self, params, ref_params):
        resolved_params = dict()
        for k, v in params.items():
            # take only string parameters
            if isinstance(v, (str, unicode)):
                par = Parser(v)
                resolved_params[k] = par.compose(ref_params)
            elif isinstance(v, dict):
                resolved_params[k] = self._resolve(v, ref_params)
            else:
                resolved_params[k] = v

        return resolved_params
コード例 #5
0
    def regional_fires_filtering_and_publishing(self, msg, regional_fmask,
                                                afsff_obj):
        """From the regional-fires-filter-mask and the fire detection data send regional messages."""
        logger.debug(
            "Perform regional masking on VIIRS AF detections and publish accordingly."
        )

        afdata = afsff_obj.get_af_data()
        fmda = afsff_obj.metadata

        fmda['platform'] = afsff_obj.platform_name

        pout = Parser(self.outfile_pattern_regional)

        output_messages = []
        regions_with_detections = 0
        for region_name in regional_fmask:
            if not regional_fmask[region_name]['some_inside_test_area']:
                continue

            regions_with_detections = regions_with_detections + 1
            fmda['region_name'] = regional_fmask[region_name]['attributes'][
                'Kod_omr']

            out_filepath = os.path.join(self.output_dir, pout.compose(fmda))
            logger.debug("Output file path = %s", out_filepath)
            data_in_region = afdata[regional_fmask[region_name]['mask']]
            filepath = store_geojson(out_filepath,
                                     data_in_region,
                                     platform_name=fmda['platform'])
            if not filepath:
                logger.warning(
                    "Something wrong happended storing regional " +
                    "data to Geojson - area: {name}".format(str(region_name)))
                continue

            outmsg = self._generate_output_message(filepath, msg,
                                                   regional_fmask[region_name])
            output_messages.append(outmsg)
            logger.info("Geojson file created! Number of fires in region = %d",
                        len(data_in_region))

        logger.debug(
            "Regional masking done. Number of regions with fire " +
            "detections on this granule: %s", str(regions_with_detections))
        return output_messages
コード例 #6
0
class GeoGatherer(object):
    """Gatherer for geostationary satellite segments"""
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        services = ""
        if config.has_option(section, 'services'):
            services = config.get(section, 'services').split()
        self._listener = ListenerContainer(topics=topics, services=services)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = [
                    '%06d' % i for i in range(int(segments[0]),
                                              int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        if self._providing_server and self._providing_server != msg.host:
            return

        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({
            'uri': msg.data['uri'],
            'uid': msg.data['uid']
        })

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])
コード例 #7
0
class SegmentGatherer(object):
    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, msg, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = {}
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                metadata[key] = msg.data[key]
        metadata['dataset'] = []

        # Use also metadata parsed from the filenames
        metadata.update(mda)

        time_slot = str(metadata[self.time_name])
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                    self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with a
        # wildcard, as these can't be forecasted.
        try:
            for tag in self._config.get(self._section,
                                        'variable_tags').split(','):
                meta[tag] = '*'
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = [
                    '%d' % i for i in range(int(segments[0]),
                                            int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(
                data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s",
                                    ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = \
            slot['wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = \
            len(wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received, time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(slot['received_files']):
            self.logger.info("All files received for slot %s.", time_slot)
            return SLOT_READY

        if slot['critical_files'].issubset(slot['received_files']):
            # All critical files have been received
            if slot['timeout'] is None:
                # Set timeout
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s.",
                                 str(slot['timeout']), time_slot)
                return SLOT_NOT_READY
            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection ready
                self.logger.info(
                    "Timeout occured, required files received "
                    "for slot %s.", time_slot)
                return SLOT_READY
            else:
                pass
        else:
            if slot['timeout'] is None:
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s",
                                 str(slot['timeout']), time_slot)
                return SLOT_NOT_READY

            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection is obsolete
                self.logger.warning(
                    "Timeout occured and required files "
                    "were not present, data discarded for "
                    "slot %s.", time_slot)
                return SLOT_OBSOLETE_TIMEOUT
            else:
                pass

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return
        time_slot = str(mda[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(msg, mda)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with a
        # wildcard, as these can't be forecasted.
        try:
            for tag in self._config.get(self._section,
                                        'variable_tags').split(','):
                mda[tag] = '*'
        except NoOptionError:
            pass

        mask = self._parser.compose(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({
            'uri': msg.data['uri'],
            'uid': msg.data['uid']
        })

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)
コード例 #8
0
class GeoGatherer(object):

    """Gatherer for geostationary satellite segments"""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = ['%06d' % i for i in range(int(segments[0]),
                                                          int(segments[-1])+1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s", file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(\
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({'uri': msg.data['uri'],
                                         'uid': msg.data['uid']})

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])