Ejemplo n.º 1
0
    def update_config(self, new_config):
        """Update configuration and reload listeners."""
        old_config = self.config
        topics = set()
        try:
            for _client, client_config in new_config.items():
                topics |= set(
                    sum([
                        item['topics']
                        for item in client_config['dispatch_configs']
                    ], []))
            if self.topics != topics:
                if self.listener is not None:
                    # FIXME: make sure to get the last messages though
                    self.listener.stop()
                self.config = new_config
                addresses = client_config.get('subscribe_addresses', None)
                nameserver = client_config.get('nameserver', 'localhost')
                services = client_config.get('subscribe_services', '')
                self.listener = ListenerContainer(topics=topics,
                                                  addresses=addresses,
                                                  nameserver=nameserver,
                                                  services=services)
                self.topics = topics

        except KeyError as err:
            logger.warning(
                'Invalid config for %s, keeping the old one running: %s',
                _client, str(err))
            self.config = old_config
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()

        try:
            nameservers = config.get(section, 'nameserver')
            nameservers = nameservers.split()
        except (NoOptionError, ValueError):
            nameservers = []

        try:
            addresses = config.get(section, 'addresses')
            addresses = addresses.split()
        except (NoOptionError, ValueError):
            addresses = None

        try:
            publish_port = config.get(section, 'publish_port')
        except NoOptionError:
            publish_port = 0

        try:
            services = config.get(section, 'services').split()
        except (NoOptionError, ValueError):
            services = ""

        self._listener = ListenerContainer(topics=topics, addresses=addresses
                                           services=services)
        self._publisher = publisher.NoisyPublisher("segment_gatherer",
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)
        try:
            self._time_tolerance = config.getint(section, "time_tolerance")
        except NoOptionError:
            self._time_tolerance = 30
        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
Ejemplo n.º 3
0
def run(prod_list, topics=None, test_message=None):

    tmessage = get_test_message(test_message)
    if tmessage:
        from threading import Thread as Process
        from posttroll.message import Message
    else:
        from multiprocessing import Process

    with open(prod_list) as fid:
        config = yaml.load(fid.read(), Loader=BaseLoader)
    topics = topics or config['product_list'].pop('subscribe_topics', None)

    if not tmessage:
        listener = ListenerContainer(topics=topics)

    while True:
        try:
            if tmessage:
                msg = Message(rawstr=tmessage)
            else:
                msg = listener.output_queue.get(True, 5)
        except KeyboardInterrupt:
            if not tmessage:
                listener.stop()
            return
        except queue_empty:
            continue

        proc = Process(target=process, args=(msg, prod_list))
        proc.start()
        proc.join()
        if tmessage:
            break
Ejemplo n.º 4
0
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        services = ""
        if config.has_option(section, 'services'):
            services = config.get(section, 'services').split()
        self._listener = ListenerContainer(topics=topics, services=services)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False
Ejemplo n.º 5
0
 def _setup_and_start_communication(self):
     """Set up the Posttroll communication and start the publisher."""
     LOG.debug("Input topic: %s", self.input_topic)
     self.listener = ListenerContainer(topics=[self.input_topic])
     self.publisher = NoisyPublisher("end_user_notifier")
     self.publisher.start()
     self.loop = True
     signal.signal(signal.SIGTERM, self.signal_shutdown)
Ejemplo n.º 6
0
    def __init__(self, config):
        self.config = config
        topics = config.sections()
        self.listener = ListenerContainer(topics=topics)
        self._loop = True

        if GSHHS_DATA_ROOT:
            self._cw = ContourWriter(GSHHS_DATA_ROOT)
        else:
            self._cw = None
        self._force_gc = False
Ejemplo n.º 7
0
    def _setup_and_start_communication(self):
        """Set up the Posttroll communication and start the publisher."""
        logger.debug("Starting up... Input topic: %s", self.input_topic)
        now = datetime_from_utc_to_local(datetime.now(), self.timezone)
        logger.debug("Output times for timezone: {zone} Now = {time}".format(
            zone=str(self.timezone), time=now))

        self.listener = ListenerContainer(topics=[self.input_topic])
        self.publisher = NoisyPublisher("active_fires_postprocessing")
        self.publisher.start()
        self.loop = True
        signal.signal(signal.SIGTERM, self.signal_shutdown)
Ejemplo n.º 8
0
    def _setup_listener(self):
        self._subject = self._config['posttroll']['publish_topic']
        topics = self._config['posttroll'].get('topics')
        addresses = self._config['posttroll'].get('addresses')
        services = self._config['posttroll'].get('services', "")
        nameserver = check_nameserver_options(
            self._config['posttroll'].get('nameservers'), for_listener=True)

        self._listener = ListenerContainer(topics=topics,
                                           addresses=addresses,
                                           nameserver=nameserver,
                                           services=services)
Ejemplo n.º 9
0
 def _setup_messaging(self):
     """Setup messaging"""
     self._subject = self._config['posttroll']['publish_topic']
     topics = self._config['posttroll'].get('topics')
     addresses = self._config['posttroll'].get('addresses')
     publish_port = self._config['posttroll'].get('publish_port', 0)
     nameservers = self._config['posttroll'].get('nameservers', [])
     self._listener = ListenerContainer(topics=topics, addresses=addresses)
     self._publisher = publisher.NoisyPublisher("segment_gatherer",
                                                port=publish_port,
                                                nameservers=nameservers)
     self._publisher.start()
Ejemplo n.º 10
0
 def _create_listener(self, client_config, topics):
     if self.listener is not None:
         # FIXME: make sure to get the last messages though
         self.listener.stop()
     addresses = client_config.get('subscribe_addresses', None)
     nameserver = client_config.get('nameserver', 'localhost')
     services = client_config.get('subscribe_services', '')
     self.listener = ListenerContainer(topics=topics,
                                       addresses=addresses,
                                       nameserver=nameserver,
                                       services=services)
     self.topics = topics
Ejemplo n.º 11
0
def run(prod_list, topics=None, test_message=None):
    """Spawn one or multiple subprocesses to run the jobs from the product list."""
    tmessage = get_test_message(test_message)
    if tmessage:
        from threading import Thread as Process
        from posttroll.message import Message
    else:
        from multiprocessing import Process

    with open(prod_list) as fid:
        config = yaml.load(fid.read(), Loader=BaseLoader)
    topics = topics or config['product_list'].pop('subscribe_topics', None)

    if not tmessage:
        listener = ListenerContainer(topics=topics)

    while True:
        try:
            if tmessage:
                msg = Message(rawstr=tmessage)
            else:
                msg = listener.output_queue.get(True, 5)
        except KeyboardInterrupt:
            if not tmessage:
                listener.stop()
            return
        except Empty:
            continue
        q = Queue()
        proc = Process(target=process, args=(msg, prod_list, q))
        proc.start()
        proc.join()
        while not q.empty():
            list_of_files = []
            x = q.get()
            if os.path.isfile(x):
                list_of_files.extend((x, os.path.getsize(x)))
                for result in list_of_files:
                    if result in list_of_files:
                        print(result)
                        LOG.info("Data published")
            else:
                print("Files missing")
                LOG.info("Data missing")

        if tmessage:
            break

        return list_of_files
Ejemplo n.º 12
0
def run(prod_list,
        topics=None,
        test_message=None,
        nameserver='localhost',
        addresses=None):
    """Spawn one or multiple subprocesses to run the jobs from the product list."""
    LOG.info("Launching trollflow2")
    tmessage = get_test_message(test_message)
    if tmessage:
        from threading import Thread as Process
        from six.moves.queue import Queue
        from posttroll.message import Message
    else:
        from multiprocessing import Process, Queue

    with open(prod_list) as fid:
        config = yaml.load(fid.read(), Loader=BaseLoader)
    topics = topics or config['product_list'].pop('subscribe_topics', None)

    if not tmessage:
        listener = ListenerContainer(topics=topics,
                                     nameserver=nameserver,
                                     addresses=addresses)

    while True:
        try:
            if tmessage:
                msg = Message(rawstr=tmessage)
            else:
                msg = listener.output_queue.get(True, 5)
        except KeyboardInterrupt:
            if not tmessage:
                listener.stop()
            return
        except Empty:
            continue
        produced_files = Queue()
        proc = Process(target=process, args=(msg, prod_list, produced_files))
        start_time = datetime.now()
        proc.start()
        proc.join()
        try:
            exitcode = proc.exitcode
        except AttributeError:
            exitcode = 0
        check_results(produced_files, start_time, exitcode)
        if tmessage:
            break
Ejemplo n.º 13
0
def run(topics, prod_list):

    listener = ListenerContainer(topics=topics)

    while True:
        try:
            msg = listener.output_queue.get(True, 5)
        except KeyboardInterrupt:
            listener.stop()
            return
        except queue_empty:
            continue

        proc = Process(target=process, args=(msg, prod_list))
        proc.start()
        proc.join()
        time.sleep(5)
Ejemplo n.º 14
0
 def _setup_messaging(self):
     """Set up messaging."""
     self._subject = self._config['posttroll']['publish_topic']
     topics = self._config['posttroll'].get('topics')
     addresses = self._config['posttroll'].get('addresses')
     publish_port = self._config['posttroll'].get('publish_port', 0)
     nameservers = self._config['posttroll'].get('nameservers', [])
     services = self._config['posttroll'].get('services', "")
     self._listener = ListenerContainer(topics=topics,
                                        addresses=addresses,
                                        services=services)
     # Name each segment_gatherer with the section/patterns name.
     # This way the user can subscribe to a specific segment_gatherer service instead of all.
     publish_service_name = self._generate_publish_service_name()
     self._publisher = publisher.NoisyPublisher(publish_service_name,
                                                port=publish_port,
                                                nameservers=nameservers)
     self._publisher.start()
Ejemplo n.º 15
0
    def test_listener_container(self):
        """Test listener container"""
        pub = NoisyPublisher("test")
        pub.start()
        sub = ListenerContainer(topics=["/counter"])
        time.sleep(2)
        for counter in range(5):
            tested = False
            msg_out = Message("/counter", "info", str(counter))
            pub.send(str(msg_out))

            msg_in = sub.output_queue.get(True, 1)
            if msg_in is not None:
                self.assertEqual(str(msg_in), str(msg_out))
                tested = True
            self.assertTrue(tested)
        pub.stop()
        sub.stop()
Ejemplo n.º 16
0
def _create_listener_from_connection_parameters(connection_parameters):
    """Create listener from connection parameters."""
    topics = connection_parameters['topic']
    nameserver = connection_parameters.get('nameserver', 'localhost')
    addresses = connection_parameters.get('addresses')
    listener = ListenerContainer(addresses=addresses,
                                 nameserver=nameserver,
                                 topics=topics)
    return listener
Ejemplo n.º 17
0
 def __init__(self, config):
     self.config = config
     self.slots = {}
     # Structure of self.slots is:
     # slots = {datetime(): {composite: {"img": None,
     #                              "num": 0},
     #                       "timeout": None}}
     self._parse_settings()
     self._listener = ListenerContainer(topics=config["topics"])
     self._set_message_settings()
     self._publisher = \
         NoisyPublisher("WorldCompositePublisher",
                        port=self.port,
                        aliases=self.aliases,
                        broadcast_interval=self.broadcast_interval,
                        nameservers=self.nameservers)
     self._publisher.start()
     self._loop = False
     if isinstance(config["area_def"], str):
         self.adef = get_area_def(config["area_def"])
     else:
         self.adef = config["area_def"]
Ejemplo n.º 18
0
    def test_listener_container(self):
        """Test listener container"""
        from posttroll.message import Message
        from posttroll.publisher import NoisyPublisher
        from posttroll.listener import ListenerContainer

        pub = NoisyPublisher("test")
        pub.start()
        sub = ListenerContainer(topics=["/counter"])
        time.sleep(2)
        for counter in range(5):
            tested = False
            msg_out = Message("/counter", "info", str(counter))
            pub.send(str(msg_out))

            msg_in = sub.output_queue.get(True, 1)
            if msg_in is not None:
                self.assertEqual(str(msg_in), str(msg_out))
                tested = True
            self.assertTrue(tested)
        pub.stop()
        sub.stop()
Ejemplo n.º 19
0
class Dispatcher(Thread):
    """Class that dispatches files."""

    def __init__(self, config_file, publish_port=None,
                 publish_nameservers=None):
        """Initialize dispatcher class."""
        super().__init__()
        self.config = None
        self.topics = None
        self.listener = None
        self._publish_port = publish_port
        self._publish_nameservers = publish_nameservers
        self.publisher = None
        self.host = socket.gethostname()
        self._create_publisher()
        self.loop = True
        self.config_handler = DispatchConfig(config_file, self.update_config)
        signal.signal(signal.SIGTERM, self.signal_shutdown)

    def _create_publisher(self):
        if self._publish_port is not None:
            self.publisher = NoisyPublisher("dispatcher", port=self._publish_port,
                                            nameservers=self._publish_nameservers)
            self.publisher.start()

    def signal_shutdown(self, *args, **kwargs):
        """Shutdown dispatcher."""
        self.close()

    def update_config(self, new_config):
        """Update configuration and reload listeners."""
        old_config = self.config
        topics = set()
        try:
            for _client, client_config in new_config.items():
                topics |= set(sum([item['topics'] for item in client_config['dispatch_configs']], []))
            if self.topics != topics:
                self.config = new_config
                self._create_listener(client_config, topics)
        except KeyError as err:
            logger.warning('Invalid config for %s, keeping the old one running: %s', _client, str(err))
            self.config = old_config

    def _create_listener(self, client_config, topics):
        if self.listener is not None:
            # FIXME: make sure to get the last messages though
            self.listener.stop()
        addresses = client_config.get('subscribe_addresses', None)
        nameserver = client_config.get('nameserver', 'localhost')
        services = client_config.get('subscribe_services', '')
        self.listener = ListenerContainer(topics=topics,
                                          addresses=addresses,
                                          nameserver=nameserver,
                                          services=services)
        self.topics = topics

    def run(self):
        """Run dispatcher."""
        while self.loop:
            try:
                msg = self.listener.output_queue.get(timeout=1)
            except Empty:
                continue
            if msg.type != 'file':
                continue
            self._dispatch_from_message(msg)

    def _dispatch_from_message(self, msg):
        destinations = self.get_destinations(msg)
        if destinations:
            # Check if the url are on another host:
            url = urlparse(msg.data['uri'])
            _check_file_locality(url, self.host)
            success = dispatch(url.path, destinations)
            if self.publisher:
                self._publish(msg, destinations, success)

    def _publish(self, msg, destinations, success):
        """Publish a message.

        The URI is replaced with the URI on the target server.

        """
        for url, _, client in destinations:
            if not success[client]:
                continue
            msg = self._get_new_message(msg, url, client)
            if msg is None:
                continue
            logger.debug('Publishing %s', str(msg))
            self.publisher.send(str(msg))

    def _get_new_message(self, msg, url, client):
        info = self._get_message_info(msg, url)
        topic = self._get_topic(client, info)
        if topic is None:
            return None
        return Message(topic, 'file', info)

    def _get_message_info(self, msg, url):
        info = msg.data.copy()
        info["uri"] = urlsplit(url).path
        return info

    def _get_topic(self, client, info):
        topic = self.config[client].get("publish_topic")
        if topic is None:
            logger.error("Publish topic not configured for '%s'", client)
            return None
        return compose(topic, info)

    def get_destinations(self, msg):
        """Get the destinations for this message."""
        destinations = []
        for client, config in self.config.items():
            for dispatch_config in config['dispatch_configs']:
                destination = self._get_destination(dispatch_config, msg, client)
                if destination is None:
                    continue
                destinations.append(destination)
        return destinations

    def _get_destination(self, dispatch_config, msg, client):
        destination = None
        if _has_correct_topic(dispatch_config, msg):
            if check_conditions(msg, dispatch_config):
                destination = self.create_dest_url(msg, client, dispatch_config)
        return destination

    def create_dest_url(self, msg, client, conf):
        """Create the destination URL and the connection parameters."""
        config = self.config[client].copy()
        _verify_filepattern(config, msg)
        config.update(conf)
        connection_parameters = config.get('connection_parameters')

        host = config['host']

        metadata = _get_metadata_with_aliases(msg, config)

        path = compose(
            os.path.join(config['directory'],
                         config['filepattern']),
            metadata)
        parts = urlsplit(host)
        host_path = urlunsplit((parts.scheme, parts.netloc, path, parts.query, parts.fragment))
        return host_path, connection_parameters, client

    def close(self):
        """Shutdown the dispatcher."""
        logger.info('Terminating dispatcher.')
        self.loop = False
        try:
            self.listener.stop()
        except Exception:
            logger.exception("Couldn't stop listener.")
        if self.publisher:
            try:
                self.publisher.stop()
            except Exception:
                logger.exception("Couldn't stop publisher.")
        try:
            self.config_handler.close()
        except Exception:
            logger.exception("Couldn't stop config handler.")
Ejemplo n.º 20
0
class SegmentGatherer(object):
    """Gatherer for geostationary satellite segments and multifile polar satellite granules."""

    _listener = None
    _publisher = None

    def __init__(self, config):
        """Initialize the segment gatherer."""
        self._config = config.copy()
        self._pattern_configs = self._config.pop('patterns')
        self._subject = None
        self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200))

        # This get the 'keep_parsed_keys' valid for all patterns
        self._keep_parsed_keys = self._config.get('keep_parsed_keys', [])

        self._patterns = self._create_patterns()

        self._elements = list(self._patterns.keys())

        self._time_tolerance = self._config.get("time_tolerance", 30)
        self._bundle_datasets = self._config.get("bundle_datasets", False)

        self._num_files_premature_publish = self._config.get(
            "num_files_premature_publish", -1)

        self.slots = OrderedDict()

        self.time_name = self._config.get('time_name', 'start_time')
        # Floor the scene start time to the given full minutes
        self._group_by_minutes = self._config.get('group_by_minutes', None)

        self._loop = False
        self._providing_server = self._config.get('providing_server')

    def _create_patterns(self):
        return {
            key: Pattern(key, pattern_config, self._config)
            for key, pattern_config in self._pattern_configs.items()
        }

    def _clear_slot(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _reinitialize_gatherer(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""
        slot = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = {}
        for key in self._elements:
            delayed_files.update(slot[key]['delayed_files'])
        if len(delayed_files) > 0:
            file_str = ''
            for key, value in delayed_files.items():
                file_str += "%s %f seconds, " % (key, value)
            logger.warning("Files received late: %s", file_str.strip(', '))

        # ... and missing files
        if missing_files_check:
            missing_files = set([])
            for key in self._elements:
                missing_files = slot[key]['all_files'].difference(
                    slot[key]['received_files'])
            if len(missing_files) > 0:
                logger.warning(
                    "Missing files: %s", ', '.join(
                        (str(missing) for missing in missing_files)))

        # Remove tags that are not necessary for datasets
        for tag in REMOVE_TAGS:
            try:
                del slot.output_metadata[tag]
            except KeyError:
                pass

        output_metadata = slot.output_metadata.copy()

        if self._bundle_datasets and "dataset" not in output_metadata:
            output_metadata["dataset"] = []
            for collection in output_metadata["collection"].values():
                output_metadata["dataset"].extend(collection['dataset'])
            del output_metadata["collection"]

        self._publish(output_metadata)

    def _publish(self, metadata):
        if "dataset" in metadata:
            msg = pmessage.Message(self._subject, "dataset", metadata)
        else:
            msg = pmessage.Message(self._subject, "collection", metadata)
        logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

    def _generate_publish_service_name(self):
        publish_service_name = "segment_gatherer"
        for key in sorted(self._elements):
            publish_service_name += "_" + str(key)
        return publish_service_name

    def _setup_messaging(self):
        """Set up messaging."""
        self._subject = self._config['posttroll']['publish_topic']
        topics = self._config['posttroll'].get('topics')
        addresses = self._config['posttroll'].get('addresses')
        publish_port = self._config['posttroll'].get('publish_port', 0)
        nameservers = self._config['posttroll'].get('nameservers', [])
        services = self._config['posttroll'].get('services', "")
        self._listener = ListenerContainer(topics=topics,
                                           addresses=addresses,
                                           services=services)
        # Name each segment_gatherer with the section/patterns name.
        # This way the user can subscribe to a specific segment_gatherer service instead of all.
        publish_service_name = self._generate_publish_service_name()
        self._publisher = publisher.NoisyPublisher(publish_service_name,
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._publisher.start()

    def run(self):
        """Run SegmentGatherer."""
        self._setup_messaging()

        self._loop = True
        while self._loop:
            self.triage_slots()

            # Check listener for new messages
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Empty:
                continue

            if msg.type in ["file", "dataset"]:
                # If providing server is configured skip message if not from providing server
                if self._providing_server and self._providing_server != msg.host:
                    continue
                logger.info("New message received: %s", str(msg))
                self.process(msg)

    def triage_slots(self):
        """Check if there are slots ready for publication."""
        slots = self.slots.copy()
        for slot_time, slot in slots.items():
            slot_time = str(slot_time)
            status = slot.get_status()
            if status == Status.SLOT_READY:
                # Collection ready, publish and remove
                self._reinitialize_gatherer(slot_time)
                self._clear_slot(slot_time)
            if status == Status.SLOT_READY_BUT_WAIT_FOR_MORE:
                # Collection ready, publish and but wait for more
                self._reinitialize_gatherer(slot_time,
                                            missing_files_check=False)
            elif status == Status.SLOT_OBSOLETE_TIMEOUT:
                # Collection unfinished and obsolete, discard
                self._clear_slot(slot_time)
            else:
                # Collection unfinished, wait for more data
                pass

    def stop(self):
        """Stop gatherer."""
        logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            if self._listener.thread is not None:
                self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message."""
        # Find the correct parser for this file
        try:
            message = self.message_from_posttroll(msg)
            pattern = message.pattern
        except TypeError:
            logger.debug("No parser matching message, skipping.")
            return

        # Check if time of the raw is in scheduled range
        if "_start_time_pattern" in pattern:
            schedule_ok = self.check_if_time_is_in_interval(
                pattern["_start_time_pattern"], message.id_time)
            if not schedule_ok:
                logger.info(
                    "Hour pattern '%s' skip: %s" + " for start_time: %s",
                    pattern.name, message.uid(),
                    message.id_time.strftime("%H:%M"))
                return

        slot_time = self._find_time_slot(message.id_time)

        # Init metadata etc if this is the first file
        if slot_time not in self.slots:
            slot = self._create_slot(message)
        else:
            slot = self.slots[slot_time]

        slot.add_file(message)

    def message_from_posttroll(self, msg):
        """Create a message object from a posttroll message instance."""
        for pattern in self._patterns.values():
            try:
                if pattern.parser.matches(msg):
                    return Message(msg, pattern)
            except KeyError as err:
                logger.debug("No key " + str(err) + " in message.")
        raise TypeError

    def _find_time_slot(self, time_obj):
        """Find time slot and return the slot as a string.

        If no slots are close enough, return *str(time_obj)*
        """
        for slot in self.slots:
            time_slot = self.slots[slot].output_metadata[self.time_name]
            time_diff = time_obj - time_slot
            if abs(time_diff.total_seconds()) < self._time_tolerance:
                logger.debug("Found existing time slot, using that")
                return slot

        return str(time_obj)

    def _create_slot(self, message):
        """Init wanted, all and critical files."""
        timestamp = str(message.id_time)
        logger.debug("Adding new slot: %s", timestamp)

        slot = Slot(timestamp, message.filtered_metadata, self._patterns,
                    self._timeliness, self._num_files_premature_publish)
        self.slots[timestamp] = slot
        return slot

    def check_if_time_is_in_interval(self, time_range, raw_start_time):
        """Check if raw time is inside configured interval."""
        time_ok = False

        # Convert check time into int variables
        raw_time = (60 * raw_start_time.hour) + raw_start_time.minute
        if time_range["midnight"] and raw_time < time_range["start"]:
            raw_time += 24 * 60

        # Check start and end time
        if time_range["start"] <= raw_time <= time_range["end"]:
            # Raw time in range, check interval
            if ((raw_time - time_range["start"]) % time_range["delta"]) == 0:
                time_ok = True

        return time_ok
Ejemplo n.º 21
0
class GeoGatherer(object):
    """Gatherer for geostationary satellite segments"""
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        services = ""
        if config.has_option(section, 'services'):
            services = config.get(section, 'services').split()
        self._listener = ListenerContainer(topics=topics, services=services)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = [
                    '%06d' % i for i in range(int(segments[0]),
                                              int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        if self._providing_server and self._providing_server != msg.host:
            return

        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({
            'uri': msg.data['uri'],
            'uid': msg.data['uid']
        })

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])
Ejemplo n.º 22
0
class EndUserNotifier(Thread):
    """The Notifier class - sending mails or text messages to end users upon incoming messages."""
    def __init__(self, configfile, netrcfile=NETRCFILE):
        """Initialize the EndUserNotifier class."""
        super().__init__()
        self.configfile = configfile
        self._netrcfile = netrcfile
        self.options = {}

        config = read_config(self.configfile)
        self._set_options_from_config(config)

        self.host = socket.gethostname()
        LOG.debug("netrc file path = %s", self._netrcfile)
        self.secrets = netrc(self._netrcfile)

        self.smtp_server = self.options.get('smtp_server')
        self.domain = self.options.get('domain')
        self.sender = self.options.get('sender')
        self.subject = self.options.get('subject')

        self.recipients = RecipientDataStruct()
        self._set_recipients()

        self.max_number_of_fires_in_sms = self.options.get(
            'max_number_of_fires_in_sms', 2)
        LOG.debug("Max number of fires in SMS: %d",
                  self.max_number_of_fires_in_sms)

        self.fire_data = self.options.get('fire_data')
        self.unsubscribe_address = self.options.get('unsubscribe_address')
        self.unsubscribe_text = self.options.get('unsubscribe_text')

        if not self.domain:
            raise IOError('Missing domain specification in config!')

        self.input_topic = self.options['subscribe_topics'][0]
        LOG.debug("Input topic: %s", self.input_topic)

        self.output_topic = self.options['publish_topic']

        self.listener = None
        self.publisher = None
        self.loop = False
        self._setup_and_start_communication()

    def _set_recipients(self):
        """Set the recipients lists."""
        self.recipients._set_recipients(
            self.options.get('recipients'),
            self.options.get('recipients_attachment'))
        self.recipients.subject = self.subject

    def _setup_and_start_communication(self):
        """Set up the Posttroll communication and start the publisher."""
        LOG.debug("Input topic: %s", self.input_topic)
        self.listener = ListenerContainer(topics=[self.input_topic])
        self.publisher = NoisyPublisher("end_user_notifier")
        self.publisher.start()
        self.loop = True
        signal.signal(signal.SIGTERM, self.signal_shutdown)

    def _set_options_from_config(self, config):
        """From the configuration on disk set the option dictionary, holding all metadata for processing."""

        for item in config:
            self.options[item] = config[item]

        if isinstance(self.options.get('subscribe_topics'), str):
            subscribe_topics = self.options.get('subscribe_topics').split(',')
            for item in subscribe_topics:
                if len(item) == 0:
                    subscribe_topics.remove(item)
            self.options['subscribe_topics'] = subscribe_topics

        if isinstance(self.options.get('publish_topics'), str):
            publish_topics = self.options.get('publish_topics').split(',')
            for item in publish_topics:
                if len(item) == 0:
                    publish_topics.remove(item)
            self.options['publish_topics'] = publish_topics

        unsubscribe = config.get('unsubscribe')
        if unsubscribe:
            for key in unsubscribe:
                self.options['unsubscribe_' + key] = unsubscribe[key]

    def signal_shutdown(self, *args, **kwargs):
        """Shutdown the Notifier process."""
        self.close()

    def run(self):
        """Run the Notifier."""
        while self.loop:
            try:
                msg = self.listener.output_queue.get(timeout=1)
                LOG.debug("Message: %s", str(msg.data))
            except Empty:
                continue
            else:
                if msg.type in [
                        'info',
                ]:
                    # No fires detected - no notification to send:
                    LOG.info(
                        "Message type info: No fires detected - no notification to send."
                    )
                    continue
                elif msg.type not in ['file', 'collection', 'dataset']:
                    LOG.debug("Message type not supported: %s", str(msg.type))
                    continue

                output_msg = self.notify_end_users(msg)
                if output_msg:
                    LOG.debug("Sending message: %s", str(output_msg))
                    self.publisher.send(str(output_msg))
                else:
                    LOG.debug("No message to send")

    def notify_end_users(self, msg):
        """Send notifications to configured end users (mail and text messages)."""
        LOG.debug("Start sending notifications to configured end users.")

        url = urlparse(msg.data.get('uri'))
        LOG.info('File path: %s', str(url.path))
        filename = url.path

        ffdata = read_geojson_data(filename)
        if not ffdata:
            return None

        platform_name = msg.data.get("platform_name")

        # Create the message(s).
        # Some recipients (typically via e-mail) should have the full message and an attachment
        # Other recipients (typically via SMS) should have several smaller messages and no attachment
        #
        full_message, sub_messages = self.create_message_content(
            ffdata['features'], "\n" + self.unsubscribe_text)

        username, password = self._get_mailserver_login_credentials()
        server = self._start_smtp_server(username, password, self.recipients)

        self._send_notifications_without_attachments(server, self.recipients,
                                                     sub_messages,
                                                     platform_name)
        self._send_notifications_with_attachments(server, self.recipients,
                                                  full_message, filename,
                                                  platform_name)

        return _create_output_message(msg, self.output_topic,
                                      self.recipients.recipients_all)

    def _send_notifications_with_attachments(self, server, recipients,
                                             full_message, filename,
                                             platform_name):
        """Send notifications with attachments."""

        notification = MIMEMultipart()
        notification['From'] = self.sender
        if platform_name:
            notification[
                'Subject'] = recipients.subject + ' Satellit = %s' % platform_name
        else:
            notification['Subject'] = recipients.subject

        if recipients.region_name:
            full_message = recipients.region_name + ":\n" + full_message

        notification.attach(MIMEText(full_message, 'plain', 'UTF-8'))
        LOG.debug("Length of message: %d", len(full_message))

        part = MIMEBase('application', "octet-stream")
        with open(filename, 'rb') as file:
            part.set_payload(file.read())
            encoders.encode_base64(part)
        part.add_header(
            'Content-Disposition',
            'attachment; filename="{}"'.format(Path(filename).name))
        notification.attach(part)

        for recip in recipients.recipients_with_attachment:
            notification['To'] = recip
            LOG.info("Send fire notification to %s", str(recip))
            LOG.debug("Subject: %s", str(recipients.subject))
            txt = notification.as_string()
            server.sendmail(self.sender, recip, txt)
            LOG.debug("Text sent: %s", txt)

        server.quit()

    def _send_notifications_without_attachments(self, server, recipients,
                                                sub_messages, platform_name):
        """Send notifications without attachments."""

        for submsg in sub_messages:
            notification = MIMEMultipart()
            notification['From'] = self.sender
            if platform_name:
                notification[
                    'Subject'] = recipients.subject + ' Satellit = %s' % platform_name
            else:
                notification['Subject'] = recipients.subject

            notification.attach(MIMEText(submsg, 'plain', 'UTF-8'))

            for recip in recipients.recipients_without_attachment:
                notification['To'] = recip
                LOG.info("Send fire notification to %s", str(recip))
                LOG.debug("Subject: %s", str(recipients.subject))
                txt = notification.as_string()
                server.sendmail(self.sender, recip, txt)
                LOG.debug("Text sent: %s", txt)

    def _get_mailserver_login_credentials(self):
        """Get the login credentials for the mail server."""
        host_secrets = self.secrets.authenticators(self.host)
        if host_secrets is None:
            LOG.error("Failed getting authentication secrets for host: %s",
                      self.host)
            raise IOError("Check out the details in the netrc file: %s",
                          self._netrcfile)

        username, _, password = host_secrets

        return username, password

    def _start_smtp_server(self, username, password, recipients):
        """Start the smtp server and loging."""
        server = smtplib.SMTP(self.smtp_server)
        server.starttls()
        server.ehlo(self.domain)
        server.rcpt(recipients.recipients_all)
        server.login(username, password)

        return server

    def create_message_content(self, gjson_features, unsubscr):
        """Create the full message string and the list of sub-messages."""
        full_msg = ''
        msg_list = []
        outstr = ''
        for idx, firespot in enumerate(gjson_features):
            if idx % self.max_number_of_fires_in_sms == 0 and idx > 0:
                full_msg = full_msg + outstr
                if len(unsubscr) > 0:
                    outstr = outstr + unsubscr

                LOG.debug('%d: Sub message = <%s>', idx, outstr)
                msg_list.append(outstr)
                outstr = ''

            lonlats = firespot['geometry']['coordinates']
            outstr = outstr + '%f N, %f E\n' % (lonlats[1], lonlats[0])
            if ('observation_time' in self.fire_data
                    and 'observation_time' in firespot['properties']):
                timestr = firespot['properties']['observation_time']
                LOG.debug("Time string: %s", str(timestr))
                try:
                    dtobj = datetime.fromisoformat(timestr)
                    # Python > 3.6
                except AttributeError:
                    dtobj = datetime.strptime(
                        timestr.split('.')[0], '%Y-%m-%dT%H:%M:%S')

                outstr = outstr + '  %s\n' % dtobj.strftime('%d %b %H:%M')

            for prop in firespot['properties']:
                if prop in self.fire_data and prop not in ['observation_time']:
                    if prop in ['power', 'Power']:
                        outstr = outstr + '  FRP: %7.3f MW\n' % (
                            firespot['properties'][prop])
                    else:
                        outstr = outstr + ' FRP: %s\n' % (str(
                            firespot['properties'][prop]))

            LOG.debug("Message length so far: %d", len(outstr))
            LOG.debug("Max number of fires in sub message: %d",
                      self.max_number_of_fires_in_sms)

        if len(outstr) > 0:
            if len(unsubscr) > 0:
                outstr = outstr + unsubscr
            LOG.debug('%d: Sub message = <%s>', idx, outstr)
            msg_list.append(outstr)

        full_msg = full_msg + outstr

        LOG.debug("Full message: <%s>", full_msg)
        LOG.debug("Sub-messages: <%s>", str(msg_list))

        return full_msg, msg_list

    def close(self):
        """Shutdown the Notifier process."""
        LOG.info('Terminating the End User Notifier process.')
        self.loop = False
        try:
            self.listener.stop()
        except Exception:
            LOG.exception("Couldn't stop listener.")
        if self.publisher:
            try:
                self.publisher.stop()
            except Exception:
                LOG.exception("Couldn't stop publisher.")
class SegmentGatherer(object):

    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()

        try:
            nameservers = config.get(section, 'nameserver')
            nameservers = nameservers.split()
        except (NoOptionError, ValueError):
            nameservers = []

        try:
            addresses = config.get(section, 'addresses')
            addresses = addresses.split()
        except (NoOptionError, ValueError):
            addresses = None

        try:
            publish_port = config.get(section, 'publish_port')
        except NoOptionError:
            publish_port = 0

        try:
            services = config.get(section, 'services').split()
        except (NoOptionError, ValueError):
            services = ""

        self._listener = ListenerContainer(topics=topics, addresses=addresses
                                           services=services)
        self._publisher = publisher.NoisyPublisher("segment_gatherer",
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)
        try:
            self._time_tolerance = config.getint(section, "time_tolerance")
        except NoOptionError:
            self._time_tolerance = 30
        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = mda.copy()
        metadata['dataset'] = []

        time_slot = str(metadata[self.time_name])
        self.logger.debug("Adding new slot: %s", time_slot)
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            meta = _copy_without_ignore_items(
                meta, ignored_keys=self._config.get(self._section,
                                                    'variable_tags').split(','))
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                format_string = '%d'
                if len(segments[0]) > 1 and segments[0][0] == '0':
                    format_string = '%0' + str(len(segments[0])) + 'd'
                segments = [format_string % i for i in range(int(segments[0]),
                                                             int(segments[-1]) + 1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.globify(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(
                data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s",
                                    ', '.join(missing_files))

        # Remove tags that are not necessary for datasets
        for tag in REMOVE_TAGS:
            try:
                del data['metadata'][tag]
            except KeyError:
                pass

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def update_timeout(self, slot):
        slot['timeout'] = dt.datetime.utcnow() + self._timeliness
        time_slot = str(slot['metadata'][self.time_name])
        self.logger.info("Setting timeout to %s for slot %s.",
                         str(slot['timeout']),
                         time_slot)

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = slot[
            'wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = len(
            wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received,
                          time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(
                slot['received_files']):
            self.logger.info("All files received for slot %s.",
                             time_slot)
            return SLOT_READY

        if slot['timeout'] is None:
            self.update_timeout(slot)

        if slot['timeout'] < dt.datetime.utcnow():
            if slot['critical_files'].issubset(slot['received_files']):
                # All critical files have been received
                # Timeout reached, collection ready
                self.logger.info("Timeout occured, required files received "
                                 "for slot %s.", time_slot)
                return SLOT_READY
            else:
                # Timeout reached, collection is obsolete
                self.logger.warning("Timeout occured and required files "
                                    "were not present, data discarded for "
                                    "slot %s.",
                                    time_slot)
                return SLOT_OBSOLETE_TIMEOUT

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                if (self._providing_server and
                        self._providing_server != msg.host):
                    continue

                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return

        metadata = {}

        # Use values parsed from the filename as basis
        for key in mda:
            if key not in DO_NOT_COPY_KEYS:
                metadata[key] = mda[key]

        # Update with data given in the message
        for key in msg.data:
            if key not in DO_NOT_COPY_KEYS:
                metadata[key] = msg.data[key]

        time_slot = self._find_time_slot(metadata[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(metadata)
            slot = self.slots[time_slot]
            to_add = []
            for filename in slot['all_files']:
                if filename == msg.data['uid']:
                    continue
                url = urlparse(msg.data['uri'])
                path = os.path.join(os.path.dirname(url.path), filename)
                if not os.path.exists(path):
                    continue
                new_url = list(url)
                new_url[2] = path
                uri = urlunparse(new_url)

                slot['metadata']['dataset'].append({'uri': uri,
                                                    'uid': filename})
                to_add.append(filename)

            slot['received_files'].update(to_add)
            if to_add:
                self.logger.debug("Some files were already received %s",
                                  str(to_add))
                self.update_timeout(slot)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            mda = _copy_without_ignore_items(
                mda, ignored_keys=self._config.get(self._section,
                                                   'variable_tags').split(','))
        except NoOptionError:
            pass

        mask = self._parser.globify(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({'uri': msg.data['uri'],
                                            'uid': msg.data['uid']})

        # Collect all sensors, not only the latest
        if type(msg.data["sensor"]) not in (tuple, list, set):
            msg.data["sensor"] = [msg.data["sensor"]]
        for sensor in msg.data["sensor"]:
            if "sensor" not in slot["metadata"]:
                slot["metadata"]["sensor"] = []
            if sensor not in slot["metadata"]["sensor"]:
                slot["metadata"]["sensor"].append(sensor)

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)

    def _find_time_slot(self, time_obj):
        """Find time slot and return the slot as a string.  If no slots are
        close enough, return *str(time_obj)*"""
        for slot in self.slots:
            time_slot = self.slots[slot]['metadata'][self.time_name]
            time_diff = time_obj - time_slot
            if abs(time_diff.total_seconds()) < self._time_tolerance:
                self.logger.debug("Found existing time slot, using that")
                return str(time_slot)

        return str(time_obj)
Ejemplo n.º 24
0
class SegmentGatherer(object):
    """Gatherer for geostationary satellite segments and multifile polar satellite granules."""

    _listener = None
    _publisher = None

    def __init__(self, config):
        """Initialize the segment gatherer."""
        self._config = config
        self._subject = None
        self._patterns = config['patterns']

        self._time_tolerance = config.get("time_tolerance", 30)
        self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200))

        self._num_files_premature_publish = \
            config.get("num_files_premature_publish", -1)

        self.slots = OrderedDict()

        self._parsers = {
            key: Parser(self._patterns[key]['pattern'])
            for key in self._patterns
        }

        self.time_name = config.get('time_name', 'start_time')
        # Floor the scene start time to the given full minutes
        self._group_by_minutes = config.get('group_by_minutes', None)

        self._keep_parsed_keys = config.get('keep_parsed_keys', [])

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = config.get('providing_server')

        # Convert check time into int minutes variables
        for key in self._patterns:
            if "start_time_pattern" in self._patterns[key]:
                time_conf = self._patterns[key]["start_time_pattern"]
                start_time_str = time_conf.get("start_time", "00:00")
                end_time_str = time_conf.get("end_time", "23:59")
                delta_time_str = time_conf.get("delta_time", "00:01")

                start_h, start_m = start_time_str.split(':')
                end_h, end_m = end_time_str.split(':')
                delta_h, delta_m = delta_time_str.split(':')
                interval = {}
                interval["start"] = (60 * int(start_h)) + int(start_m)
                interval["end"] = (60 * int(end_h)) + int(end_m)
                interval["delta"] = (60 * int(delta_h)) + int(delta_m)

                # Start-End time across midnight
                interval["midnight"] = False
                if interval["start"] > interval["end"]:
                    interval["end"] += 24 * 60
                    interval["midnight"] = True
                self._patterns[key]["_start_time_pattern"] = interval
                self.logger.info(
                    "Start Time pattern '%s' " +
                    "filter start:%s end:%s delta:%s", key, start_time_str,
                    end_time_str, delta_time_str)

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, mda):
        """Init wanted, all and critical files."""
        # Init metadata struct
        metadata = mda.copy()

        time_slot = str(metadata[self.time_name])
        self.logger.debug("Adding new slot: %s", time_slot)
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()
        self.slots[time_slot]['timeout'] = None

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        patterns = self._config['patterns']
        if len(patterns) == 1:
            self.slots[time_slot]['metadata']['dataset'] = []
        else:
            self.slots[time_slot]['metadata']['collection'] = {}
        for key in patterns:
            if len(patterns) > 1:
                self.slots[time_slot]['metadata']['collection'][key] = \
                    {'dataset': [], 'sensor': []}
            self.slots[time_slot][key] = {}
            slot = self.slots[time_slot][key]
            is_critical_set = patterns[key].get("is_critical_set", False)
            slot['is_critical_set'] = is_critical_set
            slot['critical_files'] = set([])
            slot['wanted_files'] = set([])
            slot['all_files'] = set([])
            slot['received_files'] = set([])
            slot['delayed_files'] = dict()
            slot['missing_files'] = set([])
            slot['files_till_premature_publish'] = \
                self._num_files_premature_publish

            critical_segments = patterns[key].get("critical_files", None)
            fname_set = self._compose_filenames(key, time_slot,
                                                critical_segments)
            if critical_segments:
                slot['critical_files'].update(fname_set)

            else:
                if is_critical_set:
                    # If critical segments are not defined, but the
                    # file based on this pattern is required, add it
                    # to critical files
                    slot['critical_files'].update(fname_set)

                # In any case add it to the wanted and all files
                slot['wanted_files'].update(fname_set)
                slot['all_files'].update(fname_set)

            # These segments are wanted, but not critical to production
            wanted_segments = patterns[key].get("wanted_files", None)
            slot['wanted_files'].update(
                self._compose_filenames(key, time_slot, wanted_segments))

            # Name of all the files
            all_segments = patterns[key].get("all_files", None)
            slot['all_files'].update(
                self._compose_filenames(key, time_slot, all_segments))

    def _compose_filenames(self, key, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.

        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'
        """
        # Empty set
        result = set()

        # Handle missing itm_str
        if itm_str in (None, ''):
            itm_str = ':'

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        var_tags = self._config['patterns'][key].get('variable_tags', [])
        meta = _copy_without_ignore_items(meta, ignored_keys=var_tags)

        parser = self._parsers[key]

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            if channel_name == '' and segments == '':
                # If the filename pattern has no segments/channels,
                # add the "plain" globified filename to the filename
                # set
                if ('channel_name' not in parser.fmt
                        and 'segment' not in parser.fmt):
                    result.add(parser.globify(meta))
                continue
            segments = segments.split('-')
            if len(segments) > 1:
                format_string = '%d'
                if len(segments[0]) > 1 and segments[0][0] == '0':
                    format_string = '%0' + str(len(segments[0])) + 'd'
                segments = [
                    format_string % i for i in range(int(segments[0]),
                                                     int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = parser.globify(meta)

                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""
        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = {}
        for key in self._parsers:
            delayed_files.update(data[key]['delayed_files'])
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))

        # ... and missing files
        if missing_files_check:
            missing_files = set([])
            for key in self._parsers:
                missing_files = data[key]['all_files'].difference(
                    data[key]['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s",
                                    ', '.join(missing_files))

        # Remove tags that are not necessary for datasets
        for tag in REMOVE_TAGS:
            try:
                del data['metadata'][tag]
            except KeyError:
                pass

        if len(self._parsers) == 1:
            msg = message.Message(self._subject, "dataset", data['metadata'])
        else:
            msg = message.Message(self._subject, "collection",
                                  data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def update_timeout(self, time_slot):
        """Update the timeout."""
        timeout = dt.datetime.utcnow() + self._timeliness
        self.slots[time_slot]['timeout'] = timeout
        self.logger.info("Setting timeout to %s for slot %s.", str(timeout),
                         time_slot)

    def slot_ready(self, time_slot):
        """Determine if slot is ready to be published."""
        slot = self.slots[time_slot]

        if slot['timeout'] is None:
            self.update_timeout(time_slot)
            return SLOT_NOT_READY

        status = {}
        num_files = {}
        for key in self._parsers:
            # Default
            status[key] = SLOT_NOT_READY
            if not slot[key]['is_critical_set']:
                status[key] = SLOT_NONCRITICAL_NOT_READY

            wanted_and_critical_files = slot[key]['wanted_files'].union(
                slot[key]['critical_files'])
            num_wanted_and_critical = len(wanted_and_critical_files
                                          & slot[key]['received_files'])

            num_files[key] = num_wanted_and_critical

            if num_wanted_and_critical == \
               slot[key]['files_till_premature_publish']:
                slot[key]['files_till_premature_publish'] = -1
                status[key] = SLOT_READY_BUT_WAIT_FOR_MORE

            if wanted_and_critical_files.issubset(slot[key]['received_files']):
                status[key] = SLOT_READY

        # Determine overall status
        return self.get_collection_status(status, slot['timeout'], time_slot)

    def get_collection_status(self, status, timeout, time_slot):
        """Determine the overall status of the collection."""
        if len(status) == 0:
            return SLOT_NOT_READY

        status_values = list(status.values())

        if all([val == SLOT_READY for val in status_values]):
            self.logger.info("Required files received "
                             "for slot %s.", time_slot)
            return SLOT_READY

        if dt.datetime.utcnow() > timeout:
            if (SLOT_NONCRITICAL_NOT_READY in status_values
                    and (SLOT_READY in status_values
                         or SLOT_READY_BUT_WAIT_FOR_MORE in status_values)):
                return SLOT_READY
            elif (SLOT_READY_BUT_WAIT_FOR_MORE in status_values
                  and SLOT_NOT_READY not in status_values):
                return SLOT_READY
            elif all(
                [val == SLOT_NONCRITICAL_NOT_READY for val in status_values]):
                for key in status.keys():
                    if len(self.slots[time_slot][key]['received_files']) > 0:
                        return SLOT_READY
                return SLOT_OBSOLETE_TIMEOUT
            else:
                self.logger.warning(
                    "Timeout occured and required files "
                    "were not present, data discarded for "
                    "slot %s.", time_slot)
                return SLOT_OBSOLETE_TIMEOUT

        if SLOT_NOT_READY in status_values:
            return SLOT_NOT_READY
        if SLOT_NONCRITICAL_NOT_READY in status_values:
            return SLOT_NONCRITICAL_NOT_READY
        if SLOT_READY_BUT_WAIT_FOR_MORE in status_values:
            return SLOT_READY_BUT_WAIT_FOR_MORE

    def _setup_messaging(self):
        """Set up messaging."""
        self._subject = self._config['posttroll']['publish_topic']
        topics = self._config['posttroll'].get('topics')
        addresses = self._config['posttroll'].get('addresses')
        publish_port = self._config['posttroll'].get('publish_port', 0)
        nameservers = self._config['posttroll'].get('nameservers', [])
        services = self._config['posttroll'].get('services')
        self._listener = ListenerContainer(topics=topics,
                                           addresses=addresses,
                                           services=services)
        # Name each segment_gatherer with the section name.
        # This way the user can subscribe to a specific segment_gatherer service instead of all.
        publish_service_name = "segment_gatherer_" + self._config['section']
        self._publisher = publisher.NoisyPublisher(publish_service_name,
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._publisher.start()

    def run(self):
        """Run SegmentGatherer."""
        self._setup_messaging()

        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slot)
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Empty:
                continue

            if msg.type == "file":
                # If providing server is configured skip message if not from providing server
                if self._providing_server and self._providing_server != msg.host:
                    continue
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            if self._listener.thread is not None:
                self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message."""
        mda = None

        try:
            uid = msg.data['uid']
        except KeyError:
            self.logger.debug("Ignoring: %s", str(msg))
            return

        # Find the correct parser for this file
        key = self.key_from_fname(uid)
        if key is None:
            self.logger.debug("Unknown file, skipping.")
            return

        parser = self._parsers[key]
        mda = parser.parse(msg.data["uid"])
        mda = self._floor_time(mda)

        metadata = copy_metadata(mda,
                                 msg,
                                 keep_parsed_keys=self._keep_parsed_keys)

        # Check if time of the raw is in scheduled range
        if "_start_time_pattern" in self._patterns[key]:
            schedule_ok = self.check_schedule_time(
                self._patterns[key]["_start_time_pattern"],
                metadata[self.time_name])
            if not schedule_ok:
                self.logger.info(
                    "Hour pattern '%s' skip: %s" + " for start_time: %s:%s",
                    key, msg.data["uid"], metadata[self.time_name].hour,
                    metadata[self.time_name].minute)
                return

        time_slot = self._find_time_slot(metadata[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(metadata)

        # Check if this file has been received already
        self.add_file(time_slot, key, mda, msg.data)

    def _floor_time(self, mda):
        """Floor time to full minutes."""
        if self._group_by_minutes is None:
            return mda
        start_time = mda[self.time_name]
        mins = start_time.minute
        fl_mins = int(mins / self._group_by_minutes) * self._group_by_minutes
        start_time = dt.datetime(start_time.year, start_time.month,
                                 start_time.day, start_time.hour, fl_mins, 0)
        mda[self.time_name] = start_time

        return mda

    def add_file(self, time_slot, key, mda, msg_data):
        """Add file to the correct filelist."""
        uri = urlparse(msg_data['uri']).path
        uid = msg_data['uid']
        slot = self.slots[time_slot][key]
        meta = self.slots[time_slot]['metadata']

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        ignored_keys = \
            self._config['patterns'][key].get('variable_tags', [])
        mda = _copy_without_ignore_items(mda, ignored_keys=ignored_keys)

        mask = self._parsers[key].globify(mda)
        if mask in slot['received_files']:
            self.logger.debug("File already received")
            return
        if mask not in slot['all_files']:
            self.logger.debug("%s not in %s", mask, slot['all_files'])
            return

        # self.update_timeout(time_slot)
        timeout = self.slots[time_slot]['timeout']

        # Add uid and uri
        if len(self._patterns) == 1:
            meta['dataset'].append({'uri': uri, 'uid': uid})
            sensors = meta.get('sensor', [])
        else:
            meta['collection'][key]['dataset'].append({'uri': uri, 'uid': uid})
            sensors = meta['collection'][key].get('sensor', [])

        # Collect all sensors, not only the latest
        if not isinstance(msg_data["sensor"], (tuple, list, set)):
            msg_data["sensor"] = [msg_data["sensor"]]
        if not isinstance(sensors, list):
            sensors = [sensors]
        for sensor in msg_data["sensor"]:
            if sensor not in sensors:
                sensors.append(sensor)
        meta['sensor'] = sensors

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (timeout - self._timeliness)
            if delay.total_seconds() > 0:
                slot['delayed_files'][uid] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)
        self.logger.info("%s processed", uid)

    def key_from_fname(self, uid):
        """Get the keys from a filename."""
        for key in self._parsers:
            try:
                _ = self._parsers[key].parse(uid)
                return key
            except ValueError:
                pass

    def _find_time_slot(self, time_obj):
        """Find time slot and return the slot as a string.

        If no slots are close enough, return *str(time_obj)*
        """
        for slot in self.slots:
            time_slot = self.slots[slot]['metadata'][self.time_name]
            time_diff = time_obj - time_slot
            if abs(time_diff.total_seconds()) < self._time_tolerance:
                self.logger.debug("Found existing time slot, using that")
                return str(time_slot)

        return str(time_obj)

    def check_schedule_time(self, check_time, raw_start_time):
        """Check if raw time is inside configured interval."""
        time_ok = False

        # Convert check time into int variables
        raw_time = (60 * raw_start_time.hour) + raw_start_time.minute
        if check_time["midnight"] and raw_time < check_time["start"]:
            raw_time += 24 * 60

        # Check start and end time
        if raw_time >= check_time["start"] and raw_time <= check_time["end"]:
            # Raw time in range, check interval
            if ((raw_time - check_time["start"]) % check_time["delta"]) == 0:
                time_ok = True

        return time_ok
Ejemplo n.º 25
0
class ImageScaler(object):
    '''Class for scaling images to defined sizes.'''

    # Config options for the current received message
    out_dir = ''
    update_existing = False
    is_backup = False
    subject = None
    crops = []
    sizes = []
    tags = []
    timeliness = 10
    static_image_fname = None
    areaname = None
    in_pattern = None
    fileparts = {}
    out_pattern = None
    text_pattern = None
    text_settings = None
    area_def = None
    overlay_config = None
    filepath = None
    existing_fname_parts = {}
    time_name = 'time'
    time_slot = None
    fill_value = None

    def __init__(self, config):
        self.config = config
        topics = config.sections()
        self.listener = ListenerContainer(topics=topics)
        self._loop = True

        if GSHHS_DATA_ROOT:
            self._cw = ContourWriter(GSHHS_DATA_ROOT)
        else:
            self._cw = None
        self._force_gc = False

    def stop(self):
        '''Stop scaler before shutting down.'''
        if self._loop:
            self._loop = False
            if self.listener is not None:
                self.listener.stop()

    def run(self):
        '''Start waiting for messages.

        On message arrival, read the image, scale down to the defined
        sizes and add coastlines.
        '''

        while self._loop:
            # Wait for new messages
            try:
                msg = self.listener.output_queue.get(True, 5)
            except KeyboardInterrupt:
                self.stop()
                raise
            except queue_empty:
                continue

            logging.info("New message with topic %s", msg.subject)

            self.subject = msg.subject
            self.filepath = urlparse(msg.data["uri"]).path

            try:
                self._update_current_config()
            except (NoOptionError, NoSectionError):
                logging.warning("Skip processing for this message.")
                continue

            self.time_name = self._get_time_name(msg.data)
            # Adjust in_pattern and out_pattern to match this time_name
            self.in_pattern = adjust_pattern_time_name(self.in_pattern,
                                                       self.time_name)
            self.out_pattern = adjust_pattern_time_name(
                self.out_pattern, self.time_name)

            # parse filename parts from the incoming file
            try:
                self.fileparts = parse(self.in_pattern,
                                       os.path.basename(self.filepath))
            except ValueError:
                logging.info("Filepattern doesn't match, skipping.")
                logging.debug("in_pattern: %s", self.in_pattern)
                logging.debug("fname: %s", os.path.basename(self.filepath))
                continue
            self.fileparts['areaname'] = self.areaname
            self._tidy_platform_name()

            self.time_slot = msg.data[self.time_name]
            existing_fname_parts = \
                self._check_existing(msg.data[self.time_name])

            # There is already a matching image which isn't going to
            # be updated
            if existing_fname_parts is None:
                continue
            self.existing_fname_parts = existing_fname_parts

            # Read the image
            img = read_image(self.filepath)

            if img is None:
                logging.error("Could not read image %s", self.filepath)
                continue

            # Add overlays, if any
            img = self.add_overlays(img)

            # Save image(s)
            self.save_images(img)

            # Delete obsolete image object
            del img

            # Run garbage collection if configured
            self._gc()

    def _gc(self):
        """Run garbage collection if it is configured."""
        if self._force_gc:
            num = gc.collect()
            logging.debug("Garbage collection cleaned %s objects", num)

    def _get_time_name(self, info):
        """"Try to find the name for 'nominal' time"""
        for key in info:
            if "time" in key and "end" not in key and "proc" not in key:
                return key
        return None

    def add_overlays(self, img):
        """Add overlays to image.  Add to cache, if not already there."""
        if self.overlay_config is None:
            return img

        if self._cw is None:
            logging.warning("GSHHS_DATA_ROOT is not set, "
                            "unable to add coastlines")
            return img

        if self.area_def is None:
            logging.warning("Area definition not available, "
                            "can't add overlays!")
        else:
            return add_overlay_from_config(img, self._cw, self.overlay_config,
                                           self.area_def)

    def save_images(self, img):
        """Save image(s)"""

        # Loop through different image sizes
        num = np.max([len(self.sizes), len(self.crops), len(self.tags)])
        for i in range(num):
            img_out = img.copy()
            # Crop the image
            try:
                img_out = crop_image(img_out, self.crops[i])
                logging.debug("Applied crop: %s", str(self.crops[i]))
            except IndexError:
                logging.debug("No valid crops configured")

            # Resize the image
            try:
                img_out = resize_image(img_out, self.sizes[i])
            except IndexError:
                logging.debug("No valid sizes configured")

            # Update existing image if configured to do so
            if self.update_existing and len(self.existing_fname_parts) > 0:
                try:
                    self.existing_fname_parts['tag'] = self.tags[i]
                except IndexError:
                    pass
                fname = compose(os.path.join(self.out_dir, self.out_pattern),
                                self.existing_fname_parts)
                img_out = self._update_existing_img(img_out, fname)

                # Add text
                img_out = self._add_text(img_out, update_img=True)
            # In other case, save as a new image
            else:
                # Add text
                img_out = self._add_text(img_out, update_img=False)
                # Compose filename
                try:
                    self.fileparts['tag'] = self.tags[i]
                except IndexError:
                    pass
                fname = compose(os.path.join(self.out_dir, self.out_pattern),
                                self.fileparts)

            # Save image
            save_image(img_out,
                       fname,
                       adef=self.area_def,
                       fill_value=self.fill_value,
                       save_options=self.save_options)

            # Update static image, if given in config
            try:
                self.fileparts['tag'] = self.tags[i]
            except IndexError:
                pass
            self._update_static_img(img_out)

    def _get_save_options(self):
        """Get save options from config"""
        save_tags = self._get_conf_with_default('save_tags')
        if save_tags is not None:
            save_tags = save_tags.split()
        compress = self._get_conf_with_default('compress')
        zlevel = int(self._get_conf_with_default('zlevel'))
        tile = self._get_conf_with_default('tile') in ('1', 1, 'True', True)
        blockxsize = int(self._get_conf_with_default('blockxsize'))
        blockysize = int(self._get_conf_with_default('blockysize'))
        overviews = self._get_conf_with_default('overviews')
        if overviews is not None:
            overviews = [int(i) for i in overviews.split()]
        save_options = {
            'tags': save_tags,
            'compress': compress,
            'zlevel': zlevel,
            'tile': tile,
            'blocxksize': blockxsize,
            'blockysize': blockysize,
            'overviews': overviews
        }
        return save_options

    def _update_current_config(self):
        """Update the current config to class attributes."""

        # These are mandatory config items, so handle them first
        self._get_mandatory_config_items()

        self._parse_crops()
        self._parse_sizes()
        self._parse_tags()
        self._get_text_settings()

        # Get image save options
        self.save_options = self._get_save_options()

        self.out_dir = self._get_conf_with_default('out_dir')

        self.update_existing = self._get_bool('update_existing')

        self.is_backup = self._get_bool('only_backup')

        self.timeliness = int(self._get_conf_with_default('timeliness'))

        self.fill_value = self._get_fill_value()

        self.static_image_fname_pattern = \
            self._get_conf_with_default("static_image_fname_pattern")

        self.overlay_config = \
            self._get_conf_with_default('overlay_config_fname')
        self._force_gc = self._get_bool('force_gc')

    def _get_conf_with_default(self, item):
        """Get a config item and use a default if no value is available"""
        return _get_conf_with_default(self.config, self.subject, item)

    def _get_bool(self, key):
        """Get *key* from config and interpret it as boolean"""
        val = self._get_conf_with_default(key)
        if isinstance(val, bool):
            return val
        return val.lower() in ['yes', '1', 'true']

    def _get_fill_value(self):
        """Parse fill value"""
        fill_value = self._get_conf_with_default('fill_value')
        if not isinstance(fill_value, (int, type(None))):
            fill_value = int(fill_value)
        return fill_value

    def _get_text_settings(self):
        """Parse text overlay pattern and text settings"""
        self.text_pattern = self._get_conf_with_default('text_pattern')
        self.text_settings = _get_text_settings(self.config, self.subject)

    def _get_mandatory_config_items(self):
        """Get mandatory config items and log possible errors"""
        try:
            self.areaname = self.config.get(self.subject, 'areaname')
            try:
                self.area_def = get_area_def(self.areaname)
            except (IOError, NoOptionError):
                self.area_def = None
                logging.warning("Area definition not available")
            self.in_pattern = self.config.get(self.subject, 'in_pattern')
            self.out_pattern = self.config.get(self.subject, 'out_pattern')
        except NoOptionError:
            logging.error("Required option missing!")
            logging.error(
                "Check that 'areaname', 'in_pattern' and "
                "'out_pattern' are all defined under section %s", self.subject)
            raise KeyError("Required config item missing")
        except NoSectionError:
            logging.error("No config section for message subject %s",
                          self.subject)
            raise KeyError("Missing config section")

    def _tidy_platform_name(self):
        """Remove "-" from platform names"""
        tidy = self._get_bool('tidy_platform_name')
        if tidy:
            self.fileparts['platform_name'] = self.fileparts[
                'platform_name'].replace('-', '')

    def _parse_crops(self):
        """Parse crop settings from the raw crop config"""
        crop_conf = self._get_conf_with_default('crops')
        if isinstance(crop_conf, list):
            self.crops = crop_conf
            return

        self.crops = []
        for crop in crop_conf.split(','):
            if 'x' in crop and '+' in crop:
                # Crop strings are formated like this:
                # <x_size>x<y_size>+<x_start>+<y_start>
                # eg. 1000x300+103+200
                # Origin (0, 0) is at top-left
                parts = crop.split('+')
                left, up = map(int, parts[1:])
                x_size, y_size = map(int, parts[0].split('x'))
                right, bottom = left + x_size, up + y_size
                crop = (left, up, right, bottom)

                self.crops.append(crop)
            else:
                self.crops.append(None)

    def _parse_sizes(self):
        """Parse crop settings from crop config"""
        size_conf = self._get_conf_with_default('sizes')
        if isinstance(size_conf, list):
            self.sizes = size_conf
            return

        self.sizes = []
        for size in size_conf.split(','):
            self.sizes.append([int(val) for val in size.split('x')])

    def _parse_tags(self):
        """Parse tags from tag config"""
        tag_conf = self._get_conf_with_default('tags')

        if isinstance(tag_conf, list):
            self.tags = tag_conf
            return
        self.tags = [tag for tag in tag_conf.split(',')]

    def _check_existing(self, start_time):
        """Check if there's an existing image that should be updated"""

        # check if something silmiar has already been made:
        # checks for: platform_name, areaname and
        # start_time +- timeliness minutes
        check_start_time = start_time - \
            dt.timedelta(minutes=self.timeliness)
        check_dict = self.fileparts.copy()
        try:
            check_dict["tag"] = self.tags[0]
        except IndexError:
            pass
        if self.is_backup:
            check_dict["platform_name"] = '*'
            check_dict["sat_loc"] = '*'
        # check_dict["composite"] = '*'

        first_overpass = True
        update_fname_parts = {}
        for i in range(2 * self.timeliness + 1):
            check_dict[self.time_name] = \
                check_start_time + dt.timedelta(minutes=i)
            glob_pattern = compose(
                os.path.join(self.out_dir, self.out_pattern), check_dict)
            logging.debug("Check pattern: %s", glob_pattern)
            glob_fnames = glob.glob(glob_pattern)
            if len(glob_fnames) > 0:
                fname = os.path.basename(glob_fnames[0])
                first_overpass = False
                logging.debug("Found files: %s", str(glob_fnames))
                try:
                    update_fname_parts = parse(self.out_pattern, fname)
                    update_fname_parts["composite"] = \
                        self.fileparts["composite"]
                    if not self.is_backup:
                        try:
                            update_fname_parts["platform_name"] = \
                                self.fileparts["platform_name"]
                            return update_fname_parts
                        except KeyError:
                            pass
                except ValueError:
                    logging.debug("Parsing failed for update_fname_parts.")
                    logging.debug("out_pattern: %s, basename: %s",
                                  self.out_pattern, fname)
                    update_fname_parts = {}

        # Only backup, so save only if there were no matches
        if self.is_backup and not first_overpass:
            logging.info("File already exists, no backuping needed.")
            return None
        # No existing image
        else:
            return {}

    def _update_static_img(self, img):
        """Update image with static filename"""
        if self.static_image_fname_pattern is None:
            return

        fname = compose(
            os.path.join(self.out_dir, self.static_image_fname_pattern),
            self.fileparts)
        img = self._update_existing_img(img, fname)
        img = self._add_text(img, update_img=False)

        save_image(img,
                   fname,
                   adef=self.area_def,
                   fill_value=self.fill_value,
                   save_options=self.save_options)

        logging.info("Updated image with static filename: %s", fname)

    def _add_text(self, img, update_img=False):
        """Add text to the given image"""
        if self.text_pattern is None:
            return img

        if update_img:
            text = compose(self.text_pattern, self.existing_fname_parts)
        else:
            text = compose(self.text_pattern, self.fileparts)

        return add_text(img, text, self.text_settings)

    def _update_existing_img(self, img, fname):
        """Update existing image"""
        logging.info("Updating image %s with image %s", fname, self.filepath)
        img_out = update_existing_image(fname, img, fill_value=self.fill_value)

        return img_out
Ejemplo n.º 26
0
class Dispatcher(Thread):
    """Class that dispatches files."""
    def __init__(self, config_file):
        super().__init__()
        self.config = None
        self.topics = None
        self.listener = None
        self.loop = True
        self.config_handler = DispatchConfig(config_file, self.update_config)
        signal.signal(signal.SIGTERM, self.signal_shutdown)

    def signal_shutdown(self, *args, **kwargs):
        self.close()

    def update_config(self, new_config):
        old_config = self.config
        topics = set()
        try:
            for _client, client_config in new_config.items():
                topics |= set(
                    sum([
                        item['topics']
                        for item in client_config['dispatch_configs']
                    ], []))
            if self.topics != topics:
                if self.listener is not None:
                    # FIXME: make sure to get the last messages though
                    self.listener.stop()
                self.config = new_config
                addresses = client_config.get('subscribe_addresses', None)
                nameserver = client_config.get('nameserver', 'localhost')
                services = client_config.get('subscribe_services', '')
                self.listener = ListenerContainer(topics=topics,
                                                  addresses=addresses,
                                                  nameserver=nameserver,
                                                  services=services)
                self.topics = topics

        except KeyError as err:
            logger.warning(
                'Invalid config for %s, keeping the old one running: %s',
                _client, str(err))
            self.config = old_config

    def run(self):
        while self.loop:
            try:
                msg = self.listener.output_queue.get(timeout=1)
            except Empty:
                continue
            else:
                if msg.type != 'file':
                    continue
                destinations = self.get_destinations(msg)
                if destinations:
                    dispatch(msg.data['uri'], destinations)

    def get_destinations(self, msg):
        """Get the destinations for this message."""
        destinations = []
        for client, config in self.config.items():
            for item in config['dispatch_configs']:
                for topic in item['topics']:
                    if msg.subject.startswith(topic):
                        break
                else:
                    continue
                if check_conditions(msg, item):
                    destinations.append(self.create_dest_url(
                        msg, client, item))
        return destinations

    def create_dest_url(self, msg, client, item):
        """Create the destination URL and the connection parameters."""
        defaults = self.config[client]
        info_dict = dict()
        for key in ['host', 'directory', 'filepattern']:
            try:
                info_dict[key] = item[key]
            except KeyError:
                info_dict[key] = defaults[key]
        connection_parameters = item.get('connection_parameters',
                                         defaults.get('connection_parameters'))
        host = info_dict['host']
        path = os.path.join(info_dict['directory'], info_dict['filepattern'])
        mda = msg.data.copy()
        for key, aliases in defaults.get('aliases', {}).items():
            if key in mda:
                mda[key] = aliases.get(mda[key], mda[key])
        path = compose(path, mda)
        parts = urlsplit(host)
        host_path = urlunsplit(
            (parts.scheme, parts.netloc, path, parts.query, parts.fragment))
        return host_path, connection_parameters

    def close(self):
        """Shutdown the dispatcher."""
        logger.info('Terminating dispatcher.')
        self.loop = False
        self.listener.stop()
        self.config_handler.close()
Ejemplo n.º 27
0
class Dispatcher(Thread):
    """Class that dispatches files."""
    def __init__(self,
                 config_file,
                 publish_port=None,
                 publish_nameservers=None):
        """Initialize dispatcher class."""
        super().__init__()
        self.config = None
        self.topics = None
        self.listener = None
        self.publisher = None
        if publish_port is not None:
            self.publisher = NoisyPublisher("dispatcher",
                                            port=publish_port,
                                            nameservers=publish_nameservers)
            self.publisher.start()
        self.loop = True
        self.config_handler = DispatchConfig(config_file, self.update_config)
        signal.signal(signal.SIGTERM, self.signal_shutdown)

    def signal_shutdown(self, *args, **kwargs):
        """Shutdown dispatcher."""
        self.close()

    def update_config(self, new_config):
        """Update configuration and reload listeners."""
        old_config = self.config
        topics = set()
        try:
            for _client, client_config in new_config.items():
                topics |= set(
                    sum([
                        item['topics']
                        for item in client_config['dispatch_configs']
                    ], []))
            if self.topics != topics:
                if self.listener is not None:
                    # FIXME: make sure to get the last messages though
                    self.listener.stop()
                self.config = new_config
                addresses = client_config.get('subscribe_addresses', None)
                nameserver = client_config.get('nameserver', 'localhost')
                services = client_config.get('subscribe_services', '')
                self.listener = ListenerContainer(topics=topics,
                                                  addresses=addresses,
                                                  nameserver=nameserver,
                                                  services=services)
                self.topics = topics

        except KeyError as err:
            logger.warning(
                'Invalid config for %s, keeping the old one running: %s',
                _client, str(err))
            self.config = old_config

    def run(self):
        """Run dispatcher."""
        while self.loop:
            try:
                msg = self.listener.output_queue.get(timeout=1)
            except Empty:
                continue
            else:
                if msg.type != 'file':
                    continue
                destinations = self.get_destinations(msg)
                if destinations:
                    success = dispatch(msg.data['uri'], destinations)
                    if self.publisher:
                        self._publish(msg, destinations, success)

    def _publish(self, msg, destinations, success):
        """Publish a message.

        The URI is replaced with the URI on the target server.

        """
        for url, params, client in destinations:
            if not success[client]:
                continue
            del params
            info = msg.data.copy()
            info["uri"] = urlsplit(url).path
            topic = self.config[client].get("publish_topic")
            if topic is None:
                logger.error("Publish topic not configured for '%s'", client)
                continue
            topic = compose(topic, info)
            msg = Message(topic, 'file', info)
            logger.debug('Publishing %s', str(msg))
            self.publisher.send(str(msg))

    def get_destinations(self, msg):
        """Get the destinations for this message."""
        destinations = []
        for client, config in self.config.items():
            for disp_config in config['dispatch_configs']:
                for topic in disp_config['topics']:
                    if msg.subject.startswith(topic):
                        break
                else:
                    continue
                if check_conditions(msg, disp_config):
                    destinations.append(
                        self.create_dest_url(msg, client, disp_config))
        return destinations

    def create_dest_url(self, msg, client, disp_config):
        """Create the destination URL and the connection parameters."""
        defaults = self.config[client]
        info_dict = dict()
        for key in ['host', 'directory', 'filepattern']:
            try:
                info_dict[key] = disp_config[key]
            except KeyError:
                info_dict[key] = defaults[key]
        connection_parameters = disp_config.get(
            'connection_parameters', defaults.get('connection_parameters'))
        host = info_dict['host']
        path = os.path.join(info_dict['directory'], info_dict['filepattern'])
        mda = msg.data.copy()

        for key, aliases in defaults.get('aliases', {}).items():
            if isinstance(aliases, dict):
                aliases = [aliases]

            for alias in aliases:
                new_key = alias.pop("_alias_name", key)
                if key in msg.data:
                    mda[new_key] = alias.get(msg.data[key], msg.data[key])

        path = compose(path, mda)
        parts = urlsplit(host)
        host_path = urlunsplit(
            (parts.scheme, parts.netloc, path, parts.query, parts.fragment))
        return host_path, connection_parameters, client

    def close(self):
        """Shutdown the dispatcher."""
        logger.info('Terminating dispatcher.')
        self.loop = False
        try:
            self.listener.stop()
        except Exception:
            logger.exception("Couldn't stop listener.")
        if self.publisher:
            try:
                self.publisher.stop()
            except Exception:
                logger.exception("Couldn't stop publisher.")
        try:
            self.config_handler.close()
        except Exception:
            logger.exception("Couldn't stop config handler.")
Ejemplo n.º 28
0
class ActiveFiresPostprocessing(Thread):
    """The active fires post processor."""
    def __init__(self,
                 configfile,
                 shp_boarders,
                 shp_mask,
                 regional_filtermask=None):
        """Initialize the active fires post processor class."""
        super().__init__()
        self.shp_boarders = shp_boarders
        self.shp_filtermask = shp_mask
        self.regional_filtermask = regional_filtermask
        self.configfile = configfile
        self.options = {}

        config = read_config(self.configfile)
        self._set_options_from_config(config)

        self.host = socket.gethostname()

        self.timezone = self.options.get('timezone', 'GMT')

        self.input_topic = self.options['subscribe_topics'][0]
        self.output_topic = self.options['publish_topic']
        self.infile_pattern = self.options.get('af_pattern_ibands')
        self.outfile_pattern_national = self.options.get(
            'geojson_file_pattern_national')
        self.outfile_pattern_regional = self.options.get(
            'geojson_file_pattern_regional')
        self.output_dir = self.options.get('output_dir', '/tmp')

        frmt = self.options['regional_shapefiles_format']
        self.regional_shapefiles_globstr = globify(frmt)

        self.listener = None
        self.publisher = None
        self.loop = False
        self._setup_and_start_communication()

    def _setup_and_start_communication(self):
        """Set up the Posttroll communication and start the publisher."""
        logger.debug("Starting up... Input topic: %s", self.input_topic)
        now = datetime_from_utc_to_local(datetime.now(), self.timezone)
        logger.debug("Output times for timezone: {zone} Now = {time}".format(
            zone=str(self.timezone), time=now))

        self.listener = ListenerContainer(topics=[self.input_topic])
        self.publisher = NoisyPublisher("active_fires_postprocessing")
        self.publisher.start()
        self.loop = True
        signal.signal(signal.SIGTERM, self.signal_shutdown)

    def _set_options_from_config(self, config):
        """From the configuration on disk set the option dictionary, holding all metadata for processing."""
        for item in config:
            if not isinstance(config[item], dict):
                self.options[item] = config[item]

        if isinstance(self.options.get('subscribe_topics'), str):
            subscribe_topics = self.options.get('subscribe_topics').split(',')
            for item in subscribe_topics:
                if len(item) == 0:
                    subscribe_topics.remove(item)
            self.options['subscribe_topics'] = subscribe_topics

        if isinstance(self.options.get('publish_topics'), str):
            publish_topics = self.options.get('publish_topics').split(',')
            for item in publish_topics:
                if len(item) == 0:
                    publish_topics.remove(item)
            self.options['publish_topics'] = publish_topics

    def signal_shutdown(self, *args, **kwargs):
        """Shutdown the Active Fires postprocessing."""
        self.close()

    def run(self):
        """Run the AF post processing."""
        while self.loop:
            try:
                msg = self.listener.output_queue.get(timeout=1)
                logger.debug("Message: %s", str(msg.data))
            except Empty:
                continue
            else:
                if msg.type not in ['file', 'collection', 'dataset']:
                    logger.debug("Message type not supported: %s",
                                 str(msg.type))
                    continue

                platform_name = msg.data.get('platform_name')
                filename = get_filename_from_uri(msg.data.get('uri'))
                if not os.path.exists(filename):
                    logger.warning("File does not exist!")
                    continue

                file_ok = check_file_type_okay(msg.data.get('type'))
                no_fires_text = 'No fire detections for this granule'
                output_messages = self._generate_no_fires_messages(
                    msg, no_fires_text)
                if not file_ok:
                    for output_msg in output_messages:
                        logger.debug("Sending message: %s", str(output_msg))
                        self.publisher.send(str(output_msg))
                    continue

                af_shapeff = ActiveFiresShapefileFiltering(
                    filename,
                    platform_name=platform_name,
                    timezone=self.timezone)
                afdata = af_shapeff.get_af_data(self.infile_pattern)

                if len(afdata) == 0:
                    logger.debug("Sending message: %s", str(output_msg))
                    self.publisher.send(str(output_msg))
                    continue

                output_messages, afdata = self.fires_filtering(msg, af_shapeff)

                for output_msg in output_messages:
                    if output_msg:
                        logger.debug("Sending message: %s", str(output_msg))
                        self.publisher.send(str(output_msg))

                # Do the regional filtering now:
                if not self.regional_filtermask:
                    logger.info("No regional filtering is attempted.")
                    continue

                if len(afdata) == 0:
                    logger.debug(
                        "No fires - so no regional filtering to be done!")
                    continue

                # FIXME! If afdata is empty (len=0) then it seems all data are inside all regions!
                af_shapeff = ActiveFiresShapefileFiltering(
                    afdata=afdata,
                    platform_name=platform_name,
                    timezone=self.timezone)
                regional_fmask = af_shapeff.get_regional_filtermasks(
                    self.regional_filtermask,
                    globstr=self.regional_shapefiles_globstr)
                regional_messages = self.regional_fires_filtering_and_publishing(
                    msg, regional_fmask, af_shapeff)
                for region_msg in regional_messages:
                    logger.debug("Sending message: %s", str(region_msg))
                    self.publisher.send(str(region_msg))

    def regional_fires_filtering_and_publishing(self, msg, regional_fmask,
                                                afsff_obj):
        """From the regional-fires-filter-mask and the fire detection data send regional messages."""
        logger.debug(
            "Perform regional masking on VIIRS AF detections and publish accordingly."
        )

        afdata = afsff_obj.get_af_data()
        fmda = afsff_obj.metadata

        fmda['platform'] = afsff_obj.platform_name

        pout = Parser(self.outfile_pattern_regional)

        output_messages = []
        regions_with_detections = 0
        for region_name in regional_fmask:
            if not regional_fmask[region_name]['some_inside_test_area']:
                continue

            regions_with_detections = regions_with_detections + 1
            fmda['region_name'] = regional_fmask[region_name]['attributes'][
                'Kod_omr']

            out_filepath = os.path.join(self.output_dir, pout.compose(fmda))
            logger.debug("Output file path = %s", out_filepath)
            data_in_region = afdata[regional_fmask[region_name]['mask']]
            filepath = store_geojson(out_filepath,
                                     data_in_region,
                                     platform_name=fmda['platform'])
            if not filepath:
                logger.warning(
                    "Something wrong happended storing regional " +
                    "data to Geojson - area: {name}".format(str(region_name)))
                continue

            outmsg = self._generate_output_message(filepath, msg,
                                                   regional_fmask[region_name])
            output_messages.append(outmsg)
            logger.info("Geojson file created! Number of fires in region = %d",
                        len(data_in_region))

        logger.debug(
            "Regional masking done. Number of regions with fire " +
            "detections on this granule: %s", str(regions_with_detections))
        return output_messages

    def fires_filtering(self, msg, af_shapeff):
        """Read Active Fire data and perform spatial filtering removing false detections.

        Do the national filtering first, and then filter out potential false
        detections by the special mask for that.

        """
        logger.debug(
            "Read VIIRS AF detections and perform quality control and spatial filtering"
        )

        fmda = af_shapeff.metadata
        # metdata contains time and everything but it is not being transfered to the dataframe.attrs

        pout = Parser(self.outfile_pattern_national)
        out_filepath = os.path.join(self.output_dir, pout.compose(fmda))
        logger.debug("Output file path = %s", out_filepath)

        # National filtering:
        af_shapeff.fires_filtering(self.shp_boarders)
        # Metadata should be transfered here!
        afdata_ff = af_shapeff.get_af_data()

        if len(afdata_ff) > 0:
            af_shapeff.fires_filtering(self.shp_filtermask,
                                       start_geometries_index=0,
                                       inside=False)
            afdata_ff = af_shapeff.get_af_data()

        filepath = store_geojson(out_filepath,
                                 afdata_ff,
                                 platform_name=af_shapeff.platform_name)
        out_messages = self.get_output_messages(filepath, msg, len(afdata_ff))

        return out_messages, afdata_ff

    def get_output_messages(self, filepath, msg, number_of_data):
        """Generate the adequate output message(s) depending on if an output file was created or not."""
        if filepath:
            logger.info(
                "geojson file created! Number of fires after filtering = %d",
                number_of_data)
            return [self._generate_output_message(filepath, msg)]
        else:
            logger.info(
                "No geojson file created, number of fires after filtering = %d",
                number_of_data)
            return self._generate_no_fires_messages(
                msg, 'No true fire detections inside National boarders')

    def _generate_output_message(self, filepath, input_msg, region=None):
        """Create the output message to publish."""

        output_topic = generate_posttroll_topic(self.output_topic, region)
        to_send = prepare_posttroll_message(input_msg, region)
        to_send['uri'] = ('ssh://%s/%s' % (self.host, filepath))
        to_send['uid'] = os.path.basename(filepath)
        to_send['type'] = 'GEOJSON-filtered'
        to_send['format'] = 'geojson'
        to_send['product'] = 'afimg'
        pubmsg = Message(output_topic, 'file', to_send)
        return pubmsg

    def _generate_no_fires_messages(self, input_msg, msg_string):
        """Create the output messages to publish."""

        to_send = prepare_posttroll_message(input_msg)
        to_send['info'] = msg_string
        publish_messages = []
        for ext in ['National', 'Regional']:
            topic = self.output_topic + '/' + ext
            publish_messages.append(Message(topic, 'info', to_send))

        return publish_messages

    def close(self):
        """Shutdown the Active Fires postprocessing."""
        logger.info('Terminating Active Fires post processing.')
        self.loop = False
        try:
            self.listener.stop()
        except Exception:
            logger.exception("Couldn't stop listener.")
        if self.publisher:
            try:
                self.publisher.stop()
            except Exception:
                logger.exception("Couldn't stop publisher.")
Ejemplo n.º 29
0
class WorldCompositeDaemon(object):

    logger = logging.getLogger(__name__)
    publish_topic = "/global/mosaic/{areaname}"
    nameservers = None
    port = 0
    aliases = None
    broadcast_interval = 2

    def __init__(self, config):
        self.config = config
        self.slots = {}
        # Structure of self.slots is:
        # slots = {datetime(): {composite: {"img": None,
        #                              "num": 0},
        #                       "timeout": None}}
        self._parse_settings()
        self._listener = ListenerContainer(topics=config["topics"])
        self._set_message_settings()
        self._publisher = \
            NoisyPublisher("WorldCompositePublisher",
                           port=self.port,
                           aliases=self.aliases,
                           broadcast_interval=self.broadcast_interval,
                           nameservers=self.nameservers)
        self._publisher.start()
        self._loop = False
        if isinstance(config["area_def"], str):
            self.adef = get_area_def(config["area_def"])
        else:
            self.adef = config["area_def"]

    def run(self):
        """Listen to messages and make global composites"""
        self._loop = True

        while self._loop:
            if self._check_timeouts_and_save():
                num = gc.collect()
                self.logger.debug("%d objects garbage collected", num)

            # Get new messages from the listener
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except KeyboardInterrupt:
                self._loop = False
                break
            except queue_empty:
                continue

            if msg is not None and msg.type == "file":
                self._handle_message(msg)

        self._listener.stop()
        self._publisher.stop()

    def _set_message_settings(self):
        """Set message settings from config"""
        if "message_settings" not in self.config:
            return

        self.publish_topic = \
            self.config["message_settings"].get("publish_topic",
                                                "/global/mosaic/{areaname}")
        self.nameservers = \
            self.config["message_settings"].get("nameservers", None)
        self.port = self.config["message_settings"].get("port", 0)
        self.aliases = self.config["message_settings"].get("aliases", None)
        self.broadcast_interval = \
            self.config["message_settings"].get("broadcast_interval", 2)

    def _handle_message(self, msg):
        """Insert file from the message to correct time slot and composite"""
        # Check which time should be used as basis for timeout:
        # - "message" = time of message sending
        # - "nominal_time" = time of satellite data, read from message data
        # - "receive" = current time when message is read from queue
        # Default to use slot nominal time
        timeout_epoch = self.config.get("timeout_epoch", "nominal_time")

        self.logger.debug("New message received: %s", str(msg.data))
        fname = msg.data["uri"]
        tslot = msg.data["nominal_time"]
        composite = msg.data["productname"]
        if tslot not in self.slots:
            self.slots[tslot] = {}
            self.logger.debug("Adding new timeslot: %s", str(tslot))
        if composite not in self.slots[tslot]:
            if timeout_epoch == "message":
                epoch = msg.time
            elif timeout_epoch == "receive":
                epoch = dt.datetime.utcnow()
            else:
                epoch = tslot
            self.slots[tslot][composite] = \
                {"fnames": [], "num": 0,
                 "timeout": epoch +
                 dt.timedelta(minutes=self.config["timeout"])}
            self.logger.debug("Adding new composite to slot %s: %s",
                              str(tslot), composite)
        self.logger.debug("Adding file to slot %s/%s: %s",
                          str(tslot), composite, fname)
        self.slots[tslot][composite]["fnames"].append(fname)
        self.slots[tslot][composite]["num"] += 1

    def _check_timeouts_and_save(self):
        """Check timeouts, save completed images, and cleanup slots."""
        # Number of expected images
        num_expected = self.config["num_expected"]

        # Check timeouts and completed composites
        check_time = dt.datetime.utcnow()

        saved = False
        empty_slots = []
        slots = self.slots.copy()
        for slot in slots:
            composites = tuple(slots[slot].keys())
            for composite in composites:
                if (check_time > slots[slot][composite]["timeout"] or
                        slots[slot][composite]["num"] == num_expected):
                    fnames = slots[slot][composite]["fnames"]
                    self._create_global_mosaic(fnames, slot, composite)
                    saved = True

            # Collect empty slots
            if len(slots[slot]) == 0:
                empty_slots.append(slot)

        for slot in empty_slots:
            self.logger.debug("Removing empty time slot: %s",
                              str(slot))
            del self.slots[slot]

        return saved

    def _parse_settings(self):
        """Parse static settings from config"""
        lon_limits = LON_LIMITS.copy()
        try:
            lon_limits.update(self.config["lon_limits"])
        except KeyError:
            pass
        except TypeError:
            lon_limits = None
        self.config["lon_limits"] = lon_limits

        # Get image save options
        try:
            save_kwargs = self.config["save_settings"]
        except KeyError:
            save_kwargs = {}
        self.config["save_settings"] = save_kwargs


    def _create_global_mosaic(self, fnames, slot, composite):
        """Create and save global mosaic."""
        self.logger.info("Building composite %s for slot %s",
                         composite, str(slot))
        scn = Scene()
        file_parts = self._get_fname_parts(slot, composite)
        fname_out = file_parts["uri"]

        img = self._get_existing_image(fname_out)

        self.logger.info("Creating composite")
        scn['img'] = create_world_composite(fnames,
                                            self.adef,
                                            self.config["lon_limits"],
                                            img=img,
                                            logger=self.logger)
        self.logger.info("Saving %s", fname_out)
        scn.save_dataset('img', filename=fname_out,
                         **self.config["save_settings"])
        self._send_message(file_parts)
        del self.slots[slot][composite]

    def _get_fname_parts(self, slot, composite):
        """Get filename part dictionary"""
        file_parts = {'composite': composite,
                      'nominal_time': slot,
                      'areaname': self.adef.area_id}

        fname_out = compose(self.config["out_pattern"],
                            file_parts)
        file_parts['uri'] = fname_out
        file_parts['uid'] = os.path.basename(fname_out)

        return file_parts

    def _get_existing_image(self, fname_out):
        """Read an existing image and return it.  If the image doesn't exist,
        return None"""
        # Check if we already have an image with this filename
        if os.path.exists(fname_out):
            img = read_image(fname_out, self.adef.area_id)
            self.logger.info("Existing image was read: %s", fname_out)
        else:
            img = None

        return img

    def _send_message(self, file_parts):
        """Send a message"""
        msg = Message(compose(self.publish_topic, file_parts),
                      "file", file_parts)
        self._publisher.send(str(msg))
        self.logger.info("Sending message: %s", str(msg))

    def stop(self):
        """Stop"""
        self.logger.info("Stopping WorldCompositor")
        self._listener.stop()
        self._publisher.stop()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger