def run(prod_list, topics=None, test_message=None): tmessage = get_test_message(test_message) if tmessage: from threading import Thread as Process from posttroll.message import Message else: from multiprocessing import Process with open(prod_list) as fid: config = yaml.load(fid.read(), Loader=BaseLoader) topics = topics or config['product_list'].pop('subscribe_topics', None) if not tmessage: listener = ListenerContainer(topics=topics) while True: try: if tmessage: msg = Message(rawstr=tmessage) else: msg = listener.output_queue.get(True, 5) except KeyboardInterrupt: if not tmessage: listener.stop() return except queue_empty: continue proc = Process(target=process, args=(msg, prod_list)) proc.start() proc.join() if tmessage: break
def run(prod_list, topics=None, test_message=None): """Spawn one or multiple subprocesses to run the jobs from the product list.""" tmessage = get_test_message(test_message) if tmessage: from threading import Thread as Process from posttroll.message import Message else: from multiprocessing import Process with open(prod_list) as fid: config = yaml.load(fid.read(), Loader=BaseLoader) topics = topics or config['product_list'].pop('subscribe_topics', None) if not tmessage: listener = ListenerContainer(topics=topics) while True: try: if tmessage: msg = Message(rawstr=tmessage) else: msg = listener.output_queue.get(True, 5) except KeyboardInterrupt: if not tmessage: listener.stop() return except Empty: continue q = Queue() proc = Process(target=process, args=(msg, prod_list, q)) proc.start() proc.join() while not q.empty(): list_of_files = [] x = q.get() if os.path.isfile(x): list_of_files.extend((x, os.path.getsize(x))) for result in list_of_files: if result in list_of_files: print(result) LOG.info("Data published") else: print("Files missing") LOG.info("Data missing") if tmessage: break return list_of_files
def run(prod_list, topics=None, test_message=None, nameserver='localhost', addresses=None): """Spawn one or multiple subprocesses to run the jobs from the product list.""" LOG.info("Launching trollflow2") tmessage = get_test_message(test_message) if tmessage: from threading import Thread as Process from six.moves.queue import Queue from posttroll.message import Message else: from multiprocessing import Process, Queue with open(prod_list) as fid: config = yaml.load(fid.read(), Loader=BaseLoader) topics = topics or config['product_list'].pop('subscribe_topics', None) if not tmessage: listener = ListenerContainer(topics=topics, nameserver=nameserver, addresses=addresses) while True: try: if tmessage: msg = Message(rawstr=tmessage) else: msg = listener.output_queue.get(True, 5) except KeyboardInterrupt: if not tmessage: listener.stop() return except Empty: continue produced_files = Queue() proc = Process(target=process, args=(msg, prod_list, produced_files)) start_time = datetime.now() proc.start() proc.join() try: exitcode = proc.exitcode except AttributeError: exitcode = 0 check_results(produced_files, start_time, exitcode) if tmessage: break
def run(topics, prod_list): listener = ListenerContainer(topics=topics) while True: try: msg = listener.output_queue.get(True, 5) except KeyboardInterrupt: listener.stop() return except queue_empty: continue proc = Process(target=process, args=(msg, prod_list)) proc.start() proc.join() time.sleep(5)
def test_listener_container(self): """Test listener container""" pub = NoisyPublisher("test") pub.start() sub = ListenerContainer(topics=["/counter"]) time.sleep(2) for counter in range(5): tested = False msg_out = Message("/counter", "info", str(counter)) pub.send(str(msg_out)) msg_in = sub.output_queue.get(True, 1) if msg_in is not None: self.assertEqual(str(msg_in), str(msg_out)) tested = True self.assertTrue(tested) pub.stop() sub.stop()
def test_listener_container(self): """Test listener container""" from posttroll.message import Message from posttroll.publisher import NoisyPublisher from posttroll.listener import ListenerContainer pub = NoisyPublisher("test") pub.start() sub = ListenerContainer(topics=["/counter"]) time.sleep(2) for counter in range(5): tested = False msg_out = Message("/counter", "info", str(counter)) pub.send(str(msg_out)) msg_in = sub.output_queue.get(True, 1) if msg_in is not None: self.assertEqual(str(msg_in), str(msg_out)) tested = True self.assertTrue(tested) pub.stop() sub.stop()
class SegmentGatherer(object): """Gatherer for geostationary satellite segments and multifile polar satellite granules.""" def __init__(self, config, section): self._config = config self._section = section topics = config.get(section, 'topics').split() try: nameservers = config.get(section, 'nameserver') nameservers = nameservers.split() except (NoOptionError, ValueError): nameservers = [] try: addresses = config.get(section, 'addresses') addresses = addresses.split() except (NoOptionError, ValueError): addresses = None try: publish_port = config.get(section, 'publish_port') except NoOptionError: publish_port = 0 try: services = config.get(section, 'services').split() except (NoOptionError, ValueError): services = "" self._listener = ListenerContainer(topics=topics, addresses=addresses services=services) self._publisher = publisher.NoisyPublisher("segment_gatherer", port=publish_port, nameservers=nameservers) self._subject = config.get(section, "publish_topic") self._pattern = config.get(section, 'pattern') self._parser = Parser(self._pattern) try: self._time_tolerance = config.getint(section, "time_tolerance") except NoOptionError: self._time_tolerance = 30 try: self._timeliness = dt.timedelta(seconds=config.getint(section, "timeliness")) except (NoOptionError, ValueError): self._timeliness = dt.timedelta(seconds=1200) try: self._num_files_premature_publish = \ config.getint(section, "num_files_premature_publish") except (NoOptionError, ValueError): self._num_files_premature_publish = -1 self.slots = OrderedDict() self.time_name = config.get(section, 'time_name') self.logger = logging.getLogger("segment_gatherer") self._loop = False self._providing_server = None if config.has_option(section, 'providing_server'): self._providing_server = config.get(section, 'providing_server') def _clear_data(self, time_slot): """Clear data.""" if time_slot in self.slots: del self.slots[time_slot] def _init_data(self, mda): """Init wanted, all and critical files""" # Init metadata struct metadata = mda.copy() metadata['dataset'] = [] time_slot = str(metadata[self.time_name]) self.logger.debug("Adding new slot: %s", time_slot) self.slots[time_slot] = {} self.slots[time_slot]['metadata'] = metadata.copy() # Critical files that are required, otherwise production will fail. # If there are no critical files, empty set([]) is used. try: critical_segments = self._config.get(self._section, "critical_files") self.slots[time_slot]['critical_files'] = \ self._compose_filenames(time_slot, critical_segments) except (NoOptionError, ValueError): self.slots[time_slot]['critical_files'] = set([]) # These files are wanted, but not critical to production self.slots[time_slot]['wanted_files'] = \ self._compose_filenames(time_slot, self._config.get(self._section, "wanted_files")) # Name of all the files self.slots[time_slot]['all_files'] = \ self._compose_filenames(time_slot, self._config.get(self._section, "all_files")) self.slots[time_slot]['received_files'] = set([]) self.slots[time_slot]['delayed_files'] = dict() self.slots[time_slot]['missing_files'] = set([]) self.slots[time_slot]['timeout'] = None self.slots[time_slot]['files_till_premature_publish'] = \ self._num_files_premature_publish def _compose_filenames(self, time_slot, itm_str): """Compose filename set()s based on a pattern and item string. itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'""" # Empty set result = set() # Get copy of metadata meta = self.slots[time_slot]['metadata'].copy() # Replace variable tags (such as processing time) with # wildcards, as these can't be forecasted. try: meta = _copy_without_ignore_items( meta, ignored_keys=self._config.get(self._section, 'variable_tags').split(',')) except NoOptionError: pass for itm in itm_str.split(','): channel_name, segments = itm.split(':') segments = segments.split('-') if len(segments) > 1: format_string = '%d' if len(segments[0]) > 1 and segments[0][0] == '0': format_string = '%0' + str(len(segments[0])) + 'd' segments = [format_string % i for i in range(int(segments[0]), int(segments[-1]) + 1)] meta['channel_name'] = channel_name for seg in segments: meta['segment'] = seg fname = self._parser.globify(meta) result.add(fname) return result def _publish(self, time_slot, missing_files_check=True): """Publish file dataset and reinitialize gatherer.""" data = self.slots[time_slot] # Diagnostic logging about delayed ... delayed_files = data['delayed_files'] if len(delayed_files) > 0: file_str = '' for key in delayed_files: file_str += "%s %f seconds, " % (key, delayed_files[key]) self.logger.warning("Files received late: %s", file_str.strip(', ')) if missing_files_check: # and missing files missing_files = data['all_files'].difference( data['received_files']) if len(missing_files) > 0: self.logger.warning("Missing files: %s", ', '.join(missing_files)) # Remove tags that are not necessary for datasets for tag in REMOVE_TAGS: try: del data['metadata'][tag] except KeyError: pass msg = message.Message(self._subject, "dataset", data['metadata']) self.logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) # self._clear_data(time_slot) def set_logger(self, logger): """Set logger.""" self.logger = logger def update_timeout(self, slot): slot['timeout'] = dt.datetime.utcnow() + self._timeliness time_slot = str(slot['metadata'][self.time_name]) self.logger.info("Setting timeout to %s for slot %s.", str(slot['timeout']), time_slot) def slot_ready(self, slot): """Determine if slot is ready to be published.""" # If no files have been collected, return False if len(slot['received_files']) == 0: return SLOT_NOT_READY time_slot = str(slot['metadata'][self.time_name]) wanted_and_critical_files = slot[ 'wanted_files'].union(slot['critical_files']) num_wanted_and_critical_files_received = len( wanted_and_critical_files & slot['received_files']) self.logger.debug("Got %s wanted or critical files in slot %s.", num_wanted_and_critical_files_received, time_slot) if num_wanted_and_critical_files_received \ == slot['files_till_premature_publish']: slot['files_till_premature_publish'] = -1 return SLOT_READY_BUT_WAIT_FOR_MORE # If all wanted files have been received, return True if wanted_and_critical_files.issubset( slot['received_files']): self.logger.info("All files received for slot %s.", time_slot) return SLOT_READY if slot['timeout'] is None: self.update_timeout(slot) if slot['timeout'] < dt.datetime.utcnow(): if slot['critical_files'].issubset(slot['received_files']): # All critical files have been received # Timeout reached, collection ready self.logger.info("Timeout occured, required files received " "for slot %s.", time_slot) return SLOT_READY else: # Timeout reached, collection is obsolete self.logger.warning("Timeout occured and required files " "were not present, data discarded for " "slot %s.", time_slot) return SLOT_OBSOLETE_TIMEOUT # Timeout not reached, wait for more files return SLOT_NOT_READY def run(self): """Run SegmentGatherer""" self._publisher.start() self._loop = True while self._loop: # Check if there are slots ready for publication slots = self.slots.copy() for slot in slots: slot = str(slot) status = self.slot_ready(slots[slot]) if status == SLOT_READY: # Collection ready, publish and remove self._publish(slot) self._clear_data(slot) if status == SLOT_READY_BUT_WAIT_FOR_MORE: # Collection ready, publish and but wait for more self._publish(slot, missing_files_check=False) elif status == SLOT_OBSOLETE_TIMEOUT: # Collection unfinished and obslote, discard self._clear_data(slot) else: # Collection unfinished, wait for more data pass # Check listener for new messages msg = None try: msg = self._listener.output_queue.get(True, 1) except AttributeError: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Queue.Empty: continue if msg.type == "file": if (self._providing_server and self._providing_server != msg.host): continue self.logger.info("New message received: %s", str(msg)) self.process(msg) def stop(self): """Stop gatherer.""" self.logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message""" try: mda = self._parser.parse(msg.data["uid"]) except ValueError: self.logger.debug("Unknown file, skipping.") return metadata = {} # Use values parsed from the filename as basis for key in mda: if key not in DO_NOT_COPY_KEYS: metadata[key] = mda[key] # Update with data given in the message for key in msg.data: if key not in DO_NOT_COPY_KEYS: metadata[key] = msg.data[key] time_slot = self._find_time_slot(metadata[self.time_name]) # Init metadata etc if this is the first file if time_slot not in self.slots: self._init_data(metadata) slot = self.slots[time_slot] to_add = [] for filename in slot['all_files']: if filename == msg.data['uid']: continue url = urlparse(msg.data['uri']) path = os.path.join(os.path.dirname(url.path), filename) if not os.path.exists(path): continue new_url = list(url) new_url[2] = path uri = urlunparse(new_url) slot['metadata']['dataset'].append({'uri': uri, 'uid': filename}) to_add.append(filename) slot['received_files'].update(to_add) if to_add: self.logger.debug("Some files were already received %s", str(to_add)) self.update_timeout(slot) slot = self.slots[time_slot] # Replace variable tags (such as processing time) with # wildcards, as these can't be forecasted. try: mda = _copy_without_ignore_items( mda, ignored_keys=self._config.get(self._section, 'variable_tags').split(',')) except NoOptionError: pass mask = self._parser.globify(mda) if mask in slot['received_files']: return # Add uid and uri slot['metadata']['dataset'].append({'uri': msg.data['uri'], 'uid': msg.data['uid']}) # Collect all sensors, not only the latest if type(msg.data["sensor"]) not in (tuple, list, set): msg.data["sensor"] = [msg.data["sensor"]] for sensor in msg.data["sensor"]: if "sensor" not in slot["metadata"]: slot["metadata"]["sensor"] = [] if sensor not in slot["metadata"]["sensor"]: slot["metadata"]["sensor"].append(sensor) # If critical files have been received but the slot is # not complete, add the file to list of delayed files if len(slot['critical_files']) > 0 and \ slot['critical_files'].issubset(slot['received_files']): delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness) slot['delayed_files'][msg.data['uid']] = delay.total_seconds() # Add to received files slot['received_files'].add(mask) def _find_time_slot(self, time_obj): """Find time slot and return the slot as a string. If no slots are close enough, return *str(time_obj)*""" for slot in self.slots: time_slot = self.slots[slot]['metadata'][self.time_name] time_diff = time_obj - time_slot if abs(time_diff.total_seconds()) < self._time_tolerance: self.logger.debug("Found existing time slot, using that") return str(time_slot) return str(time_obj)
class SegmentGatherer(object): """Gatherer for geostationary satellite segments and multifile polar satellite granules.""" _listener = None _publisher = None def __init__(self, config): """Initialize the segment gatherer.""" self._config = config self._subject = None self._patterns = config['patterns'] self._time_tolerance = config.get("time_tolerance", 30) self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200)) self._num_files_premature_publish = \ config.get("num_files_premature_publish", -1) self.slots = OrderedDict() self._parsers = { key: Parser(self._patterns[key]['pattern']) for key in self._patterns } self.time_name = config.get('time_name', 'start_time') # Floor the scene start time to the given full minutes self._group_by_minutes = config.get('group_by_minutes', None) self._keep_parsed_keys = config.get('keep_parsed_keys', []) self.logger = logging.getLogger("segment_gatherer") self._loop = False self._providing_server = config.get('providing_server') # Convert check time into int minutes variables for key in self._patterns: if "start_time_pattern" in self._patterns[key]: time_conf = self._patterns[key]["start_time_pattern"] start_time_str = time_conf.get("start_time", "00:00") end_time_str = time_conf.get("end_time", "23:59") delta_time_str = time_conf.get("delta_time", "00:01") start_h, start_m = start_time_str.split(':') end_h, end_m = end_time_str.split(':') delta_h, delta_m = delta_time_str.split(':') interval = {} interval["start"] = (60 * int(start_h)) + int(start_m) interval["end"] = (60 * int(end_h)) + int(end_m) interval["delta"] = (60 * int(delta_h)) + int(delta_m) # Start-End time across midnight interval["midnight"] = False if interval["start"] > interval["end"]: interval["end"] += 24 * 60 interval["midnight"] = True self._patterns[key]["_start_time_pattern"] = interval self.logger.info( "Start Time pattern '%s' " + "filter start:%s end:%s delta:%s", key, start_time_str, end_time_str, delta_time_str) def _clear_data(self, time_slot): """Clear data.""" if time_slot in self.slots: del self.slots[time_slot] def _init_data(self, mda): """Init wanted, all and critical files.""" # Init metadata struct metadata = mda.copy() time_slot = str(metadata[self.time_name]) self.logger.debug("Adding new slot: %s", time_slot) self.slots[time_slot] = {} self.slots[time_slot]['metadata'] = metadata.copy() self.slots[time_slot]['timeout'] = None # Critical files that are required, otherwise production will fail. # If there are no critical files, empty set([]) is used. patterns = self._config['patterns'] if len(patterns) == 1: self.slots[time_slot]['metadata']['dataset'] = [] else: self.slots[time_slot]['metadata']['collection'] = {} for key in patterns: if len(patterns) > 1: self.slots[time_slot]['metadata']['collection'][key] = \ {'dataset': [], 'sensor': []} self.slots[time_slot][key] = {} slot = self.slots[time_slot][key] is_critical_set = patterns[key].get("is_critical_set", False) slot['is_critical_set'] = is_critical_set slot['critical_files'] = set([]) slot['wanted_files'] = set([]) slot['all_files'] = set([]) slot['received_files'] = set([]) slot['delayed_files'] = dict() slot['missing_files'] = set([]) slot['files_till_premature_publish'] = \ self._num_files_premature_publish critical_segments = patterns[key].get("critical_files", None) fname_set = self._compose_filenames(key, time_slot, critical_segments) if critical_segments: slot['critical_files'].update(fname_set) else: if is_critical_set: # If critical segments are not defined, but the # file based on this pattern is required, add it # to critical files slot['critical_files'].update(fname_set) # In any case add it to the wanted and all files slot['wanted_files'].update(fname_set) slot['all_files'].update(fname_set) # These segments are wanted, but not critical to production wanted_segments = patterns[key].get("wanted_files", None) slot['wanted_files'].update( self._compose_filenames(key, time_slot, wanted_segments)) # Name of all the files all_segments = patterns[key].get("all_files", None) slot['all_files'].update( self._compose_filenames(key, time_slot, all_segments)) def _compose_filenames(self, key, time_slot, itm_str): """Compose filename set()s based on a pattern and item string. itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...' """ # Empty set result = set() # Handle missing itm_str if itm_str in (None, ''): itm_str = ':' # Get copy of metadata meta = self.slots[time_slot]['metadata'].copy() # Replace variable tags (such as processing time) with # wildcards, as these can't be forecasted. var_tags = self._config['patterns'][key].get('variable_tags', []) meta = _copy_without_ignore_items(meta, ignored_keys=var_tags) parser = self._parsers[key] for itm in itm_str.split(','): channel_name, segments = itm.split(':') if channel_name == '' and segments == '': # If the filename pattern has no segments/channels, # add the "plain" globified filename to the filename # set if ('channel_name' not in parser.fmt and 'segment' not in parser.fmt): result.add(parser.globify(meta)) continue segments = segments.split('-') if len(segments) > 1: format_string = '%d' if len(segments[0]) > 1 and segments[0][0] == '0': format_string = '%0' + str(len(segments[0])) + 'd' segments = [ format_string % i for i in range(int(segments[0]), int(segments[-1]) + 1) ] meta['channel_name'] = channel_name for seg in segments: meta['segment'] = seg fname = parser.globify(meta) result.add(fname) return result def _publish(self, time_slot, missing_files_check=True): """Publish file dataset and reinitialize gatherer.""" data = self.slots[time_slot] # Diagnostic logging about delayed ... delayed_files = {} for key in self._parsers: delayed_files.update(data[key]['delayed_files']) if len(delayed_files) > 0: file_str = '' for key in delayed_files: file_str += "%s %f seconds, " % (key, delayed_files[key]) self.logger.warning("Files received late: %s", file_str.strip(', ')) # ... and missing files if missing_files_check: missing_files = set([]) for key in self._parsers: missing_files = data[key]['all_files'].difference( data[key]['received_files']) if len(missing_files) > 0: self.logger.warning("Missing files: %s", ', '.join(missing_files)) # Remove tags that are not necessary for datasets for tag in REMOVE_TAGS: try: del data['metadata'][tag] except KeyError: pass if len(self._parsers) == 1: msg = message.Message(self._subject, "dataset", data['metadata']) else: msg = message.Message(self._subject, "collection", data['metadata']) self.logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) # self._clear_data(time_slot) def set_logger(self, logger): """Set logger.""" self.logger = logger def update_timeout(self, time_slot): """Update the timeout.""" timeout = dt.datetime.utcnow() + self._timeliness self.slots[time_slot]['timeout'] = timeout self.logger.info("Setting timeout to %s for slot %s.", str(timeout), time_slot) def slot_ready(self, time_slot): """Determine if slot is ready to be published.""" slot = self.slots[time_slot] if slot['timeout'] is None: self.update_timeout(time_slot) return SLOT_NOT_READY status = {} num_files = {} for key in self._parsers: # Default status[key] = SLOT_NOT_READY if not slot[key]['is_critical_set']: status[key] = SLOT_NONCRITICAL_NOT_READY wanted_and_critical_files = slot[key]['wanted_files'].union( slot[key]['critical_files']) num_wanted_and_critical = len(wanted_and_critical_files & slot[key]['received_files']) num_files[key] = num_wanted_and_critical if num_wanted_and_critical == \ slot[key]['files_till_premature_publish']: slot[key]['files_till_premature_publish'] = -1 status[key] = SLOT_READY_BUT_WAIT_FOR_MORE if wanted_and_critical_files.issubset(slot[key]['received_files']): status[key] = SLOT_READY # Determine overall status return self.get_collection_status(status, slot['timeout'], time_slot) def get_collection_status(self, status, timeout, time_slot): """Determine the overall status of the collection.""" if len(status) == 0: return SLOT_NOT_READY status_values = list(status.values()) if all([val == SLOT_READY for val in status_values]): self.logger.info("Required files received " "for slot %s.", time_slot) return SLOT_READY if dt.datetime.utcnow() > timeout: if (SLOT_NONCRITICAL_NOT_READY in status_values and (SLOT_READY in status_values or SLOT_READY_BUT_WAIT_FOR_MORE in status_values)): return SLOT_READY elif (SLOT_READY_BUT_WAIT_FOR_MORE in status_values and SLOT_NOT_READY not in status_values): return SLOT_READY elif all( [val == SLOT_NONCRITICAL_NOT_READY for val in status_values]): for key in status.keys(): if len(self.slots[time_slot][key]['received_files']) > 0: return SLOT_READY return SLOT_OBSOLETE_TIMEOUT else: self.logger.warning( "Timeout occured and required files " "were not present, data discarded for " "slot %s.", time_slot) return SLOT_OBSOLETE_TIMEOUT if SLOT_NOT_READY in status_values: return SLOT_NOT_READY if SLOT_NONCRITICAL_NOT_READY in status_values: return SLOT_NONCRITICAL_NOT_READY if SLOT_READY_BUT_WAIT_FOR_MORE in status_values: return SLOT_READY_BUT_WAIT_FOR_MORE def _setup_messaging(self): """Set up messaging.""" self._subject = self._config['posttroll']['publish_topic'] topics = self._config['posttroll'].get('topics') addresses = self._config['posttroll'].get('addresses') publish_port = self._config['posttroll'].get('publish_port', 0) nameservers = self._config['posttroll'].get('nameservers', []) services = self._config['posttroll'].get('services') self._listener = ListenerContainer(topics=topics, addresses=addresses, services=services) # Name each segment_gatherer with the section name. # This way the user can subscribe to a specific segment_gatherer service instead of all. publish_service_name = "segment_gatherer_" + self._config['section'] self._publisher = publisher.NoisyPublisher(publish_service_name, port=publish_port, nameservers=nameservers) self._publisher.start() def run(self): """Run SegmentGatherer.""" self._setup_messaging() self._loop = True while self._loop: # Check if there are slots ready for publication slots = self.slots.copy() for slot in slots: slot = str(slot) status = self.slot_ready(slot) if status == SLOT_READY: # Collection ready, publish and remove self._publish(slot) self._clear_data(slot) if status == SLOT_READY_BUT_WAIT_FOR_MORE: # Collection ready, publish and but wait for more self._publish(slot, missing_files_check=False) elif status == SLOT_OBSOLETE_TIMEOUT: # Collection unfinished and obslote, discard self._clear_data(slot) else: # Collection unfinished, wait for more data pass # Check listener for new messages msg = None try: msg = self._listener.output_queue.get(True, 1) except AttributeError: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Empty: continue if msg.type == "file": # If providing server is configured skip message if not from providing server if self._providing_server and self._providing_server != msg.host: continue self.logger.info("New message received: %s", str(msg)) self.process(msg) def stop(self): """Stop gatherer.""" self.logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: if self._listener.thread is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message.""" mda = None try: uid = msg.data['uid'] except KeyError: self.logger.debug("Ignoring: %s", str(msg)) return # Find the correct parser for this file key = self.key_from_fname(uid) if key is None: self.logger.debug("Unknown file, skipping.") return parser = self._parsers[key] mda = parser.parse(msg.data["uid"]) mda = self._floor_time(mda) metadata = copy_metadata(mda, msg, keep_parsed_keys=self._keep_parsed_keys) # Check if time of the raw is in scheduled range if "_start_time_pattern" in self._patterns[key]: schedule_ok = self.check_schedule_time( self._patterns[key]["_start_time_pattern"], metadata[self.time_name]) if not schedule_ok: self.logger.info( "Hour pattern '%s' skip: %s" + " for start_time: %s:%s", key, msg.data["uid"], metadata[self.time_name].hour, metadata[self.time_name].minute) return time_slot = self._find_time_slot(metadata[self.time_name]) # Init metadata etc if this is the first file if time_slot not in self.slots: self._init_data(metadata) # Check if this file has been received already self.add_file(time_slot, key, mda, msg.data) def _floor_time(self, mda): """Floor time to full minutes.""" if self._group_by_minutes is None: return mda start_time = mda[self.time_name] mins = start_time.minute fl_mins = int(mins / self._group_by_minutes) * self._group_by_minutes start_time = dt.datetime(start_time.year, start_time.month, start_time.day, start_time.hour, fl_mins, 0) mda[self.time_name] = start_time return mda def add_file(self, time_slot, key, mda, msg_data): """Add file to the correct filelist.""" uri = urlparse(msg_data['uri']).path uid = msg_data['uid'] slot = self.slots[time_slot][key] meta = self.slots[time_slot]['metadata'] # Replace variable tags (such as processing time) with # wildcards, as these can't be forecasted. ignored_keys = \ self._config['patterns'][key].get('variable_tags', []) mda = _copy_without_ignore_items(mda, ignored_keys=ignored_keys) mask = self._parsers[key].globify(mda) if mask in slot['received_files']: self.logger.debug("File already received") return if mask not in slot['all_files']: self.logger.debug("%s not in %s", mask, slot['all_files']) return # self.update_timeout(time_slot) timeout = self.slots[time_slot]['timeout'] # Add uid and uri if len(self._patterns) == 1: meta['dataset'].append({'uri': uri, 'uid': uid}) sensors = meta.get('sensor', []) else: meta['collection'][key]['dataset'].append({'uri': uri, 'uid': uid}) sensors = meta['collection'][key].get('sensor', []) # Collect all sensors, not only the latest if not isinstance(msg_data["sensor"], (tuple, list, set)): msg_data["sensor"] = [msg_data["sensor"]] if not isinstance(sensors, list): sensors = [sensors] for sensor in msg_data["sensor"]: if sensor not in sensors: sensors.append(sensor) meta['sensor'] = sensors # If critical files have been received but the slot is # not complete, add the file to list of delayed files if len(slot['critical_files']) > 0 and \ slot['critical_files'].issubset(slot['received_files']): delay = dt.datetime.utcnow() - (timeout - self._timeliness) if delay.total_seconds() > 0: slot['delayed_files'][uid] = delay.total_seconds() # Add to received files slot['received_files'].add(mask) self.logger.info("%s processed", uid) def key_from_fname(self, uid): """Get the keys from a filename.""" for key in self._parsers: try: _ = self._parsers[key].parse(uid) return key except ValueError: pass def _find_time_slot(self, time_obj): """Find time slot and return the slot as a string. If no slots are close enough, return *str(time_obj)* """ for slot in self.slots: time_slot = self.slots[slot]['metadata'][self.time_name] time_diff = time_obj - time_slot if abs(time_diff.total_seconds()) < self._time_tolerance: self.logger.debug("Found existing time slot, using that") return str(time_slot) return str(time_obj) def check_schedule_time(self, check_time, raw_start_time): """Check if raw time is inside configured interval.""" time_ok = False # Convert check time into int variables raw_time = (60 * raw_start_time.hour) + raw_start_time.minute if check_time["midnight"] and raw_time < check_time["start"]: raw_time += 24 * 60 # Check start and end time if raw_time >= check_time["start"] and raw_time <= check_time["end"]: # Raw time in range, check interval if ((raw_time - check_time["start"]) % check_time["delta"]) == 0: time_ok = True return time_ok
class Dispatcher(Thread): """Class that dispatches files.""" def __init__(self, config_file, publish_port=None, publish_nameservers=None): """Initialize dispatcher class.""" super().__init__() self.config = None self.topics = None self.listener = None self.publisher = None if publish_port is not None: self.publisher = NoisyPublisher("dispatcher", port=publish_port, nameservers=publish_nameservers) self.publisher.start() self.loop = True self.config_handler = DispatchConfig(config_file, self.update_config) signal.signal(signal.SIGTERM, self.signal_shutdown) def signal_shutdown(self, *args, **kwargs): """Shutdown dispatcher.""" self.close() def update_config(self, new_config): """Update configuration and reload listeners.""" old_config = self.config topics = set() try: for _client, client_config in new_config.items(): topics |= set( sum([ item['topics'] for item in client_config['dispatch_configs'] ], [])) if self.topics != topics: if self.listener is not None: # FIXME: make sure to get the last messages though self.listener.stop() self.config = new_config addresses = client_config.get('subscribe_addresses', None) nameserver = client_config.get('nameserver', 'localhost') services = client_config.get('subscribe_services', '') self.listener = ListenerContainer(topics=topics, addresses=addresses, nameserver=nameserver, services=services) self.topics = topics except KeyError as err: logger.warning( 'Invalid config for %s, keeping the old one running: %s', _client, str(err)) self.config = old_config def run(self): """Run dispatcher.""" while self.loop: try: msg = self.listener.output_queue.get(timeout=1) except Empty: continue else: if msg.type != 'file': continue destinations = self.get_destinations(msg) if destinations: success = dispatch(msg.data['uri'], destinations) if self.publisher: self._publish(msg, destinations, success) def _publish(self, msg, destinations, success): """Publish a message. The URI is replaced with the URI on the target server. """ for url, params, client in destinations: if not success[client]: continue del params info = msg.data.copy() info["uri"] = urlsplit(url).path topic = self.config[client].get("publish_topic") if topic is None: logger.error("Publish topic not configured for '%s'", client) continue topic = compose(topic, info) msg = Message(topic, 'file', info) logger.debug('Publishing %s', str(msg)) self.publisher.send(str(msg)) def get_destinations(self, msg): """Get the destinations for this message.""" destinations = [] for client, config in self.config.items(): for disp_config in config['dispatch_configs']: for topic in disp_config['topics']: if msg.subject.startswith(topic): break else: continue if check_conditions(msg, disp_config): destinations.append( self.create_dest_url(msg, client, disp_config)) return destinations def create_dest_url(self, msg, client, disp_config): """Create the destination URL and the connection parameters.""" defaults = self.config[client] info_dict = dict() for key in ['host', 'directory', 'filepattern']: try: info_dict[key] = disp_config[key] except KeyError: info_dict[key] = defaults[key] connection_parameters = disp_config.get( 'connection_parameters', defaults.get('connection_parameters')) host = info_dict['host'] path = os.path.join(info_dict['directory'], info_dict['filepattern']) mda = msg.data.copy() for key, aliases in defaults.get('aliases', {}).items(): if isinstance(aliases, dict): aliases = [aliases] for alias in aliases: new_key = alias.pop("_alias_name", key) if key in msg.data: mda[new_key] = alias.get(msg.data[key], msg.data[key]) path = compose(path, mda) parts = urlsplit(host) host_path = urlunsplit( (parts.scheme, parts.netloc, path, parts.query, parts.fragment)) return host_path, connection_parameters, client def close(self): """Shutdown the dispatcher.""" logger.info('Terminating dispatcher.') self.loop = False try: self.listener.stop() except Exception: logger.exception("Couldn't stop listener.") if self.publisher: try: self.publisher.stop() except Exception: logger.exception("Couldn't stop publisher.") try: self.config_handler.close() except Exception: logger.exception("Couldn't stop config handler.")
class ActiveFiresPostprocessing(Thread): """The active fires post processor.""" def __init__(self, configfile, shp_boarders, shp_mask, regional_filtermask=None): """Initialize the active fires post processor class.""" super().__init__() self.shp_boarders = shp_boarders self.shp_filtermask = shp_mask self.regional_filtermask = regional_filtermask self.configfile = configfile self.options = {} config = read_config(self.configfile) self._set_options_from_config(config) self.host = socket.gethostname() self.timezone = self.options.get('timezone', 'GMT') self.input_topic = self.options['subscribe_topics'][0] self.output_topic = self.options['publish_topic'] self.infile_pattern = self.options.get('af_pattern_ibands') self.outfile_pattern_national = self.options.get( 'geojson_file_pattern_national') self.outfile_pattern_regional = self.options.get( 'geojson_file_pattern_regional') self.output_dir = self.options.get('output_dir', '/tmp') frmt = self.options['regional_shapefiles_format'] self.regional_shapefiles_globstr = globify(frmt) self.listener = None self.publisher = None self.loop = False self._setup_and_start_communication() def _setup_and_start_communication(self): """Set up the Posttroll communication and start the publisher.""" logger.debug("Starting up... Input topic: %s", self.input_topic) now = datetime_from_utc_to_local(datetime.now(), self.timezone) logger.debug("Output times for timezone: {zone} Now = {time}".format( zone=str(self.timezone), time=now)) self.listener = ListenerContainer(topics=[self.input_topic]) self.publisher = NoisyPublisher("active_fires_postprocessing") self.publisher.start() self.loop = True signal.signal(signal.SIGTERM, self.signal_shutdown) def _set_options_from_config(self, config): """From the configuration on disk set the option dictionary, holding all metadata for processing.""" for item in config: if not isinstance(config[item], dict): self.options[item] = config[item] if isinstance(self.options.get('subscribe_topics'), str): subscribe_topics = self.options.get('subscribe_topics').split(',') for item in subscribe_topics: if len(item) == 0: subscribe_topics.remove(item) self.options['subscribe_topics'] = subscribe_topics if isinstance(self.options.get('publish_topics'), str): publish_topics = self.options.get('publish_topics').split(',') for item in publish_topics: if len(item) == 0: publish_topics.remove(item) self.options['publish_topics'] = publish_topics def signal_shutdown(self, *args, **kwargs): """Shutdown the Active Fires postprocessing.""" self.close() def run(self): """Run the AF post processing.""" while self.loop: try: msg = self.listener.output_queue.get(timeout=1) logger.debug("Message: %s", str(msg.data)) except Empty: continue else: if msg.type not in ['file', 'collection', 'dataset']: logger.debug("Message type not supported: %s", str(msg.type)) continue platform_name = msg.data.get('platform_name') filename = get_filename_from_uri(msg.data.get('uri')) if not os.path.exists(filename): logger.warning("File does not exist!") continue file_ok = check_file_type_okay(msg.data.get('type')) no_fires_text = 'No fire detections for this granule' output_messages = self._generate_no_fires_messages( msg, no_fires_text) if not file_ok: for output_msg in output_messages: logger.debug("Sending message: %s", str(output_msg)) self.publisher.send(str(output_msg)) continue af_shapeff = ActiveFiresShapefileFiltering( filename, platform_name=platform_name, timezone=self.timezone) afdata = af_shapeff.get_af_data(self.infile_pattern) if len(afdata) == 0: logger.debug("Sending message: %s", str(output_msg)) self.publisher.send(str(output_msg)) continue output_messages, afdata = self.fires_filtering(msg, af_shapeff) for output_msg in output_messages: if output_msg: logger.debug("Sending message: %s", str(output_msg)) self.publisher.send(str(output_msg)) # Do the regional filtering now: if not self.regional_filtermask: logger.info("No regional filtering is attempted.") continue if len(afdata) == 0: logger.debug( "No fires - so no regional filtering to be done!") continue # FIXME! If afdata is empty (len=0) then it seems all data are inside all regions! af_shapeff = ActiveFiresShapefileFiltering( afdata=afdata, platform_name=platform_name, timezone=self.timezone) regional_fmask = af_shapeff.get_regional_filtermasks( self.regional_filtermask, globstr=self.regional_shapefiles_globstr) regional_messages = self.regional_fires_filtering_and_publishing( msg, regional_fmask, af_shapeff) for region_msg in regional_messages: logger.debug("Sending message: %s", str(region_msg)) self.publisher.send(str(region_msg)) def regional_fires_filtering_and_publishing(self, msg, regional_fmask, afsff_obj): """From the regional-fires-filter-mask and the fire detection data send regional messages.""" logger.debug( "Perform regional masking on VIIRS AF detections and publish accordingly." ) afdata = afsff_obj.get_af_data() fmda = afsff_obj.metadata fmda['platform'] = afsff_obj.platform_name pout = Parser(self.outfile_pattern_regional) output_messages = [] regions_with_detections = 0 for region_name in regional_fmask: if not regional_fmask[region_name]['some_inside_test_area']: continue regions_with_detections = regions_with_detections + 1 fmda['region_name'] = regional_fmask[region_name]['attributes'][ 'Kod_omr'] out_filepath = os.path.join(self.output_dir, pout.compose(fmda)) logger.debug("Output file path = %s", out_filepath) data_in_region = afdata[regional_fmask[region_name]['mask']] filepath = store_geojson(out_filepath, data_in_region, platform_name=fmda['platform']) if not filepath: logger.warning( "Something wrong happended storing regional " + "data to Geojson - area: {name}".format(str(region_name))) continue outmsg = self._generate_output_message(filepath, msg, regional_fmask[region_name]) output_messages.append(outmsg) logger.info("Geojson file created! Number of fires in region = %d", len(data_in_region)) logger.debug( "Regional masking done. Number of regions with fire " + "detections on this granule: %s", str(regions_with_detections)) return output_messages def fires_filtering(self, msg, af_shapeff): """Read Active Fire data and perform spatial filtering removing false detections. Do the national filtering first, and then filter out potential false detections by the special mask for that. """ logger.debug( "Read VIIRS AF detections and perform quality control and spatial filtering" ) fmda = af_shapeff.metadata # metdata contains time and everything but it is not being transfered to the dataframe.attrs pout = Parser(self.outfile_pattern_national) out_filepath = os.path.join(self.output_dir, pout.compose(fmda)) logger.debug("Output file path = %s", out_filepath) # National filtering: af_shapeff.fires_filtering(self.shp_boarders) # Metadata should be transfered here! afdata_ff = af_shapeff.get_af_data() if len(afdata_ff) > 0: af_shapeff.fires_filtering(self.shp_filtermask, start_geometries_index=0, inside=False) afdata_ff = af_shapeff.get_af_data() filepath = store_geojson(out_filepath, afdata_ff, platform_name=af_shapeff.platform_name) out_messages = self.get_output_messages(filepath, msg, len(afdata_ff)) return out_messages, afdata_ff def get_output_messages(self, filepath, msg, number_of_data): """Generate the adequate output message(s) depending on if an output file was created or not.""" if filepath: logger.info( "geojson file created! Number of fires after filtering = %d", number_of_data) return [self._generate_output_message(filepath, msg)] else: logger.info( "No geojson file created, number of fires after filtering = %d", number_of_data) return self._generate_no_fires_messages( msg, 'No true fire detections inside National boarders') def _generate_output_message(self, filepath, input_msg, region=None): """Create the output message to publish.""" output_topic = generate_posttroll_topic(self.output_topic, region) to_send = prepare_posttroll_message(input_msg, region) to_send['uri'] = ('ssh://%s/%s' % (self.host, filepath)) to_send['uid'] = os.path.basename(filepath) to_send['type'] = 'GEOJSON-filtered' to_send['format'] = 'geojson' to_send['product'] = 'afimg' pubmsg = Message(output_topic, 'file', to_send) return pubmsg def _generate_no_fires_messages(self, input_msg, msg_string): """Create the output messages to publish.""" to_send = prepare_posttroll_message(input_msg) to_send['info'] = msg_string publish_messages = [] for ext in ['National', 'Regional']: topic = self.output_topic + '/' + ext publish_messages.append(Message(topic, 'info', to_send)) return publish_messages def close(self): """Shutdown the Active Fires postprocessing.""" logger.info('Terminating Active Fires post processing.') self.loop = False try: self.listener.stop() except Exception: logger.exception("Couldn't stop listener.") if self.publisher: try: self.publisher.stop() except Exception: logger.exception("Couldn't stop publisher.")
class ImageScaler(object): '''Class for scaling images to defined sizes.''' # Config options for the current received message out_dir = '' update_existing = False is_backup = False subject = None crops = [] sizes = [] tags = [] timeliness = 10 static_image_fname = None areaname = None in_pattern = None fileparts = {} out_pattern = None text_pattern = None text_settings = None area_def = None overlay_config = None filepath = None existing_fname_parts = {} time_name = 'time' time_slot = None fill_value = None def __init__(self, config): self.config = config topics = config.sections() self.listener = ListenerContainer(topics=topics) self._loop = True if GSHHS_DATA_ROOT: self._cw = ContourWriter(GSHHS_DATA_ROOT) else: self._cw = None self._force_gc = False def stop(self): '''Stop scaler before shutting down.''' if self._loop: self._loop = False if self.listener is not None: self.listener.stop() def run(self): '''Start waiting for messages. On message arrival, read the image, scale down to the defined sizes and add coastlines. ''' while self._loop: # Wait for new messages try: msg = self.listener.output_queue.get(True, 5) except KeyboardInterrupt: self.stop() raise except queue_empty: continue logging.info("New message with topic %s", msg.subject) self.subject = msg.subject self.filepath = urlparse(msg.data["uri"]).path try: self._update_current_config() except (NoOptionError, NoSectionError): logging.warning("Skip processing for this message.") continue self.time_name = self._get_time_name(msg.data) # Adjust in_pattern and out_pattern to match this time_name self.in_pattern = adjust_pattern_time_name(self.in_pattern, self.time_name) self.out_pattern = adjust_pattern_time_name( self.out_pattern, self.time_name) # parse filename parts from the incoming file try: self.fileparts = parse(self.in_pattern, os.path.basename(self.filepath)) except ValueError: logging.info("Filepattern doesn't match, skipping.") logging.debug("in_pattern: %s", self.in_pattern) logging.debug("fname: %s", os.path.basename(self.filepath)) continue self.fileparts['areaname'] = self.areaname self._tidy_platform_name() self.time_slot = msg.data[self.time_name] existing_fname_parts = \ self._check_existing(msg.data[self.time_name]) # There is already a matching image which isn't going to # be updated if existing_fname_parts is None: continue self.existing_fname_parts = existing_fname_parts # Read the image img = read_image(self.filepath) if img is None: logging.error("Could not read image %s", self.filepath) continue # Add overlays, if any img = self.add_overlays(img) # Save image(s) self.save_images(img) # Delete obsolete image object del img # Run garbage collection if configured self._gc() def _gc(self): """Run garbage collection if it is configured.""" if self._force_gc: num = gc.collect() logging.debug("Garbage collection cleaned %s objects", num) def _get_time_name(self, info): """"Try to find the name for 'nominal' time""" for key in info: if "time" in key and "end" not in key and "proc" not in key: return key return None def add_overlays(self, img): """Add overlays to image. Add to cache, if not already there.""" if self.overlay_config is None: return img if self._cw is None: logging.warning("GSHHS_DATA_ROOT is not set, " "unable to add coastlines") return img if self.area_def is None: logging.warning("Area definition not available, " "can't add overlays!") else: return add_overlay_from_config(img, self._cw, self.overlay_config, self.area_def) def save_images(self, img): """Save image(s)""" # Loop through different image sizes num = np.max([len(self.sizes), len(self.crops), len(self.tags)]) for i in range(num): img_out = img.copy() # Crop the image try: img_out = crop_image(img_out, self.crops[i]) logging.debug("Applied crop: %s", str(self.crops[i])) except IndexError: logging.debug("No valid crops configured") # Resize the image try: img_out = resize_image(img_out, self.sizes[i]) except IndexError: logging.debug("No valid sizes configured") # Update existing image if configured to do so if self.update_existing and len(self.existing_fname_parts) > 0: try: self.existing_fname_parts['tag'] = self.tags[i] except IndexError: pass fname = compose(os.path.join(self.out_dir, self.out_pattern), self.existing_fname_parts) img_out = self._update_existing_img(img_out, fname) # Add text img_out = self._add_text(img_out, update_img=True) # In other case, save as a new image else: # Add text img_out = self._add_text(img_out, update_img=False) # Compose filename try: self.fileparts['tag'] = self.tags[i] except IndexError: pass fname = compose(os.path.join(self.out_dir, self.out_pattern), self.fileparts) # Save image save_image(img_out, fname, adef=self.area_def, fill_value=self.fill_value, save_options=self.save_options) # Update static image, if given in config try: self.fileparts['tag'] = self.tags[i] except IndexError: pass self._update_static_img(img_out) def _get_save_options(self): """Get save options from config""" save_tags = self._get_conf_with_default('save_tags') if save_tags is not None: save_tags = save_tags.split() compress = self._get_conf_with_default('compress') zlevel = int(self._get_conf_with_default('zlevel')) tile = self._get_conf_with_default('tile') in ('1', 1, 'True', True) blockxsize = int(self._get_conf_with_default('blockxsize')) blockysize = int(self._get_conf_with_default('blockysize')) overviews = self._get_conf_with_default('overviews') if overviews is not None: overviews = [int(i) for i in overviews.split()] save_options = { 'tags': save_tags, 'compress': compress, 'zlevel': zlevel, 'tile': tile, 'blocxksize': blockxsize, 'blockysize': blockysize, 'overviews': overviews } return save_options def _update_current_config(self): """Update the current config to class attributes.""" # These are mandatory config items, so handle them first self._get_mandatory_config_items() self._parse_crops() self._parse_sizes() self._parse_tags() self._get_text_settings() # Get image save options self.save_options = self._get_save_options() self.out_dir = self._get_conf_with_default('out_dir') self.update_existing = self._get_bool('update_existing') self.is_backup = self._get_bool('only_backup') self.timeliness = int(self._get_conf_with_default('timeliness')) self.fill_value = self._get_fill_value() self.static_image_fname_pattern = \ self._get_conf_with_default("static_image_fname_pattern") self.overlay_config = \ self._get_conf_with_default('overlay_config_fname') self._force_gc = self._get_bool('force_gc') def _get_conf_with_default(self, item): """Get a config item and use a default if no value is available""" return _get_conf_with_default(self.config, self.subject, item) def _get_bool(self, key): """Get *key* from config and interpret it as boolean""" val = self._get_conf_with_default(key) if isinstance(val, bool): return val return val.lower() in ['yes', '1', 'true'] def _get_fill_value(self): """Parse fill value""" fill_value = self._get_conf_with_default('fill_value') if not isinstance(fill_value, (int, type(None))): fill_value = int(fill_value) return fill_value def _get_text_settings(self): """Parse text overlay pattern and text settings""" self.text_pattern = self._get_conf_with_default('text_pattern') self.text_settings = _get_text_settings(self.config, self.subject) def _get_mandatory_config_items(self): """Get mandatory config items and log possible errors""" try: self.areaname = self.config.get(self.subject, 'areaname') try: self.area_def = get_area_def(self.areaname) except (IOError, NoOptionError): self.area_def = None logging.warning("Area definition not available") self.in_pattern = self.config.get(self.subject, 'in_pattern') self.out_pattern = self.config.get(self.subject, 'out_pattern') except NoOptionError: logging.error("Required option missing!") logging.error( "Check that 'areaname', 'in_pattern' and " "'out_pattern' are all defined under section %s", self.subject) raise KeyError("Required config item missing") except NoSectionError: logging.error("No config section for message subject %s", self.subject) raise KeyError("Missing config section") def _tidy_platform_name(self): """Remove "-" from platform names""" tidy = self._get_bool('tidy_platform_name') if tidy: self.fileparts['platform_name'] = self.fileparts[ 'platform_name'].replace('-', '') def _parse_crops(self): """Parse crop settings from the raw crop config""" crop_conf = self._get_conf_with_default('crops') if isinstance(crop_conf, list): self.crops = crop_conf return self.crops = [] for crop in crop_conf.split(','): if 'x' in crop and '+' in crop: # Crop strings are formated like this: # <x_size>x<y_size>+<x_start>+<y_start> # eg. 1000x300+103+200 # Origin (0, 0) is at top-left parts = crop.split('+') left, up = map(int, parts[1:]) x_size, y_size = map(int, parts[0].split('x')) right, bottom = left + x_size, up + y_size crop = (left, up, right, bottom) self.crops.append(crop) else: self.crops.append(None) def _parse_sizes(self): """Parse crop settings from crop config""" size_conf = self._get_conf_with_default('sizes') if isinstance(size_conf, list): self.sizes = size_conf return self.sizes = [] for size in size_conf.split(','): self.sizes.append([int(val) for val in size.split('x')]) def _parse_tags(self): """Parse tags from tag config""" tag_conf = self._get_conf_with_default('tags') if isinstance(tag_conf, list): self.tags = tag_conf return self.tags = [tag for tag in tag_conf.split(',')] def _check_existing(self, start_time): """Check if there's an existing image that should be updated""" # check if something silmiar has already been made: # checks for: platform_name, areaname and # start_time +- timeliness minutes check_start_time = start_time - \ dt.timedelta(minutes=self.timeliness) check_dict = self.fileparts.copy() try: check_dict["tag"] = self.tags[0] except IndexError: pass if self.is_backup: check_dict["platform_name"] = '*' check_dict["sat_loc"] = '*' # check_dict["composite"] = '*' first_overpass = True update_fname_parts = {} for i in range(2 * self.timeliness + 1): check_dict[self.time_name] = \ check_start_time + dt.timedelta(minutes=i) glob_pattern = compose( os.path.join(self.out_dir, self.out_pattern), check_dict) logging.debug("Check pattern: %s", glob_pattern) glob_fnames = glob.glob(glob_pattern) if len(glob_fnames) > 0: fname = os.path.basename(glob_fnames[0]) first_overpass = False logging.debug("Found files: %s", str(glob_fnames)) try: update_fname_parts = parse(self.out_pattern, fname) update_fname_parts["composite"] = \ self.fileparts["composite"] if not self.is_backup: try: update_fname_parts["platform_name"] = \ self.fileparts["platform_name"] return update_fname_parts except KeyError: pass except ValueError: logging.debug("Parsing failed for update_fname_parts.") logging.debug("out_pattern: %s, basename: %s", self.out_pattern, fname) update_fname_parts = {} # Only backup, so save only if there were no matches if self.is_backup and not first_overpass: logging.info("File already exists, no backuping needed.") return None # No existing image else: return {} def _update_static_img(self, img): """Update image with static filename""" if self.static_image_fname_pattern is None: return fname = compose( os.path.join(self.out_dir, self.static_image_fname_pattern), self.fileparts) img = self._update_existing_img(img, fname) img = self._add_text(img, update_img=False) save_image(img, fname, adef=self.area_def, fill_value=self.fill_value, save_options=self.save_options) logging.info("Updated image with static filename: %s", fname) def _add_text(self, img, update_img=False): """Add text to the given image""" if self.text_pattern is None: return img if update_img: text = compose(self.text_pattern, self.existing_fname_parts) else: text = compose(self.text_pattern, self.fileparts) return add_text(img, text, self.text_settings) def _update_existing_img(self, img, fname): """Update existing image""" logging.info("Updating image %s with image %s", fname, self.filepath) img_out = update_existing_image(fname, img, fill_value=self.fill_value) return img_out
class WorldCompositeDaemon(object): logger = logging.getLogger(__name__) publish_topic = "/global/mosaic/{areaname}" nameservers = None port = 0 aliases = None broadcast_interval = 2 def __init__(self, config): self.config = config self.slots = {} # Structure of self.slots is: # slots = {datetime(): {composite: {"img": None, # "num": 0}, # "timeout": None}} self._parse_settings() self._listener = ListenerContainer(topics=config["topics"]) self._set_message_settings() self._publisher = \ NoisyPublisher("WorldCompositePublisher", port=self.port, aliases=self.aliases, broadcast_interval=self.broadcast_interval, nameservers=self.nameservers) self._publisher.start() self._loop = False if isinstance(config["area_def"], str): self.adef = get_area_def(config["area_def"]) else: self.adef = config["area_def"] def run(self): """Listen to messages and make global composites""" self._loop = True while self._loop: if self._check_timeouts_and_save(): num = gc.collect() self.logger.debug("%d objects garbage collected", num) # Get new messages from the listener msg = None try: msg = self._listener.output_queue.get(True, 1) except KeyboardInterrupt: self._loop = False break except queue_empty: continue if msg is not None and msg.type == "file": self._handle_message(msg) self._listener.stop() self._publisher.stop() def _set_message_settings(self): """Set message settings from config""" if "message_settings" not in self.config: return self.publish_topic = \ self.config["message_settings"].get("publish_topic", "/global/mosaic/{areaname}") self.nameservers = \ self.config["message_settings"].get("nameservers", None) self.port = self.config["message_settings"].get("port", 0) self.aliases = self.config["message_settings"].get("aliases", None) self.broadcast_interval = \ self.config["message_settings"].get("broadcast_interval", 2) def _handle_message(self, msg): """Insert file from the message to correct time slot and composite""" # Check which time should be used as basis for timeout: # - "message" = time of message sending # - "nominal_time" = time of satellite data, read from message data # - "receive" = current time when message is read from queue # Default to use slot nominal time timeout_epoch = self.config.get("timeout_epoch", "nominal_time") self.logger.debug("New message received: %s", str(msg.data)) fname = msg.data["uri"] tslot = msg.data["nominal_time"] composite = msg.data["productname"] if tslot not in self.slots: self.slots[tslot] = {} self.logger.debug("Adding new timeslot: %s", str(tslot)) if composite not in self.slots[tslot]: if timeout_epoch == "message": epoch = msg.time elif timeout_epoch == "receive": epoch = dt.datetime.utcnow() else: epoch = tslot self.slots[tslot][composite] = \ {"fnames": [], "num": 0, "timeout": epoch + dt.timedelta(minutes=self.config["timeout"])} self.logger.debug("Adding new composite to slot %s: %s", str(tslot), composite) self.logger.debug("Adding file to slot %s/%s: %s", str(tslot), composite, fname) self.slots[tslot][composite]["fnames"].append(fname) self.slots[tslot][composite]["num"] += 1 def _check_timeouts_and_save(self): """Check timeouts, save completed images, and cleanup slots.""" # Number of expected images num_expected = self.config["num_expected"] # Check timeouts and completed composites check_time = dt.datetime.utcnow() saved = False empty_slots = [] slots = self.slots.copy() for slot in slots: composites = tuple(slots[slot].keys()) for composite in composites: if (check_time > slots[slot][composite]["timeout"] or slots[slot][composite]["num"] == num_expected): fnames = slots[slot][composite]["fnames"] self._create_global_mosaic(fnames, slot, composite) saved = True # Collect empty slots if len(slots[slot]) == 0: empty_slots.append(slot) for slot in empty_slots: self.logger.debug("Removing empty time slot: %s", str(slot)) del self.slots[slot] return saved def _parse_settings(self): """Parse static settings from config""" lon_limits = LON_LIMITS.copy() try: lon_limits.update(self.config["lon_limits"]) except KeyError: pass except TypeError: lon_limits = None self.config["lon_limits"] = lon_limits # Get image save options try: save_kwargs = self.config["save_settings"] except KeyError: save_kwargs = {} self.config["save_settings"] = save_kwargs def _create_global_mosaic(self, fnames, slot, composite): """Create and save global mosaic.""" self.logger.info("Building composite %s for slot %s", composite, str(slot)) scn = Scene() file_parts = self._get_fname_parts(slot, composite) fname_out = file_parts["uri"] img = self._get_existing_image(fname_out) self.logger.info("Creating composite") scn['img'] = create_world_composite(fnames, self.adef, self.config["lon_limits"], img=img, logger=self.logger) self.logger.info("Saving %s", fname_out) scn.save_dataset('img', filename=fname_out, **self.config["save_settings"]) self._send_message(file_parts) del self.slots[slot][composite] def _get_fname_parts(self, slot, composite): """Get filename part dictionary""" file_parts = {'composite': composite, 'nominal_time': slot, 'areaname': self.adef.area_id} fname_out = compose(self.config["out_pattern"], file_parts) file_parts['uri'] = fname_out file_parts['uid'] = os.path.basename(fname_out) return file_parts def _get_existing_image(self, fname_out): """Read an existing image and return it. If the image doesn't exist, return None""" # Check if we already have an image with this filename if os.path.exists(fname_out): img = read_image(fname_out, self.adef.area_id) self.logger.info("Existing image was read: %s", fname_out) else: img = None return img def _send_message(self, file_parts): """Send a message""" msg = Message(compose(self.publish_topic, file_parts), "file", file_parts) self._publisher.send(str(msg)) self.logger.info("Sending message: %s", str(msg)) def stop(self): """Stop""" self.logger.info("Stopping WorldCompositor") self._listener.stop() self._publisher.stop() def set_logger(self, logger): """Set logger.""" self.logger = logger
class Dispatcher(Thread): """Class that dispatches files.""" def __init__(self, config_file): super().__init__() self.config = None self.topics = None self.listener = None self.loop = True self.config_handler = DispatchConfig(config_file, self.update_config) signal.signal(signal.SIGTERM, self.signal_shutdown) def signal_shutdown(self, *args, **kwargs): self.close() def update_config(self, new_config): old_config = self.config topics = set() try: for _client, client_config in new_config.items(): topics |= set( sum([ item['topics'] for item in client_config['dispatch_configs'] ], [])) if self.topics != topics: if self.listener is not None: # FIXME: make sure to get the last messages though self.listener.stop() self.config = new_config addresses = client_config.get('subscribe_addresses', None) nameserver = client_config.get('nameserver', 'localhost') services = client_config.get('subscribe_services', '') self.listener = ListenerContainer(topics=topics, addresses=addresses, nameserver=nameserver, services=services) self.topics = topics except KeyError as err: logger.warning( 'Invalid config for %s, keeping the old one running: %s', _client, str(err)) self.config = old_config def run(self): while self.loop: try: msg = self.listener.output_queue.get(timeout=1) except Empty: continue else: if msg.type != 'file': continue destinations = self.get_destinations(msg) if destinations: dispatch(msg.data['uri'], destinations) def get_destinations(self, msg): """Get the destinations for this message.""" destinations = [] for client, config in self.config.items(): for item in config['dispatch_configs']: for topic in item['topics']: if msg.subject.startswith(topic): break else: continue if check_conditions(msg, item): destinations.append(self.create_dest_url( msg, client, item)) return destinations def create_dest_url(self, msg, client, item): """Create the destination URL and the connection parameters.""" defaults = self.config[client] info_dict = dict() for key in ['host', 'directory', 'filepattern']: try: info_dict[key] = item[key] except KeyError: info_dict[key] = defaults[key] connection_parameters = item.get('connection_parameters', defaults.get('connection_parameters')) host = info_dict['host'] path = os.path.join(info_dict['directory'], info_dict['filepattern']) mda = msg.data.copy() for key, aliases in defaults.get('aliases', {}).items(): if key in mda: mda[key] = aliases.get(mda[key], mda[key]) path = compose(path, mda) parts = urlsplit(host) host_path = urlunsplit( (parts.scheme, parts.netloc, path, parts.query, parts.fragment)) return host_path, connection_parameters def close(self): """Shutdown the dispatcher.""" logger.info('Terminating dispatcher.') self.loop = False self.listener.stop() self.config_handler.close()
class EndUserNotifier(Thread): """The Notifier class - sending mails or text messages to end users upon incoming messages.""" def __init__(self, configfile, netrcfile=NETRCFILE): """Initialize the EndUserNotifier class.""" super().__init__() self.configfile = configfile self._netrcfile = netrcfile self.options = {} config = read_config(self.configfile) self._set_options_from_config(config) self.host = socket.gethostname() LOG.debug("netrc file path = %s", self._netrcfile) self.secrets = netrc(self._netrcfile) self.smtp_server = self.options.get('smtp_server') self.domain = self.options.get('domain') self.sender = self.options.get('sender') self.subject = self.options.get('subject') self.recipients = RecipientDataStruct() self._set_recipients() self.max_number_of_fires_in_sms = self.options.get( 'max_number_of_fires_in_sms', 2) LOG.debug("Max number of fires in SMS: %d", self.max_number_of_fires_in_sms) self.fire_data = self.options.get('fire_data') self.unsubscribe_address = self.options.get('unsubscribe_address') self.unsubscribe_text = self.options.get('unsubscribe_text') if not self.domain: raise IOError('Missing domain specification in config!') self.input_topic = self.options['subscribe_topics'][0] LOG.debug("Input topic: %s", self.input_topic) self.output_topic = self.options['publish_topic'] self.listener = None self.publisher = None self.loop = False self._setup_and_start_communication() def _set_recipients(self): """Set the recipients lists.""" self.recipients._set_recipients( self.options.get('recipients'), self.options.get('recipients_attachment')) self.recipients.subject = self.subject def _setup_and_start_communication(self): """Set up the Posttroll communication and start the publisher.""" LOG.debug("Input topic: %s", self.input_topic) self.listener = ListenerContainer(topics=[self.input_topic]) self.publisher = NoisyPublisher("end_user_notifier") self.publisher.start() self.loop = True signal.signal(signal.SIGTERM, self.signal_shutdown) def _set_options_from_config(self, config): """From the configuration on disk set the option dictionary, holding all metadata for processing.""" for item in config: self.options[item] = config[item] if isinstance(self.options.get('subscribe_topics'), str): subscribe_topics = self.options.get('subscribe_topics').split(',') for item in subscribe_topics: if len(item) == 0: subscribe_topics.remove(item) self.options['subscribe_topics'] = subscribe_topics if isinstance(self.options.get('publish_topics'), str): publish_topics = self.options.get('publish_topics').split(',') for item in publish_topics: if len(item) == 0: publish_topics.remove(item) self.options['publish_topics'] = publish_topics unsubscribe = config.get('unsubscribe') if unsubscribe: for key in unsubscribe: self.options['unsubscribe_' + key] = unsubscribe[key] def signal_shutdown(self, *args, **kwargs): """Shutdown the Notifier process.""" self.close() def run(self): """Run the Notifier.""" while self.loop: try: msg = self.listener.output_queue.get(timeout=1) LOG.debug("Message: %s", str(msg.data)) except Empty: continue else: if msg.type in [ 'info', ]: # No fires detected - no notification to send: LOG.info( "Message type info: No fires detected - no notification to send." ) continue elif msg.type not in ['file', 'collection', 'dataset']: LOG.debug("Message type not supported: %s", str(msg.type)) continue output_msg = self.notify_end_users(msg) if output_msg: LOG.debug("Sending message: %s", str(output_msg)) self.publisher.send(str(output_msg)) else: LOG.debug("No message to send") def notify_end_users(self, msg): """Send notifications to configured end users (mail and text messages).""" LOG.debug("Start sending notifications to configured end users.") url = urlparse(msg.data.get('uri')) LOG.info('File path: %s', str(url.path)) filename = url.path ffdata = read_geojson_data(filename) if not ffdata: return None platform_name = msg.data.get("platform_name") # Create the message(s). # Some recipients (typically via e-mail) should have the full message and an attachment # Other recipients (typically via SMS) should have several smaller messages and no attachment # full_message, sub_messages = self.create_message_content( ffdata['features'], "\n" + self.unsubscribe_text) username, password = self._get_mailserver_login_credentials() server = self._start_smtp_server(username, password, self.recipients) self._send_notifications_without_attachments(server, self.recipients, sub_messages, platform_name) self._send_notifications_with_attachments(server, self.recipients, full_message, filename, platform_name) return _create_output_message(msg, self.output_topic, self.recipients.recipients_all) def _send_notifications_with_attachments(self, server, recipients, full_message, filename, platform_name): """Send notifications with attachments.""" notification = MIMEMultipart() notification['From'] = self.sender if platform_name: notification[ 'Subject'] = recipients.subject + ' Satellit = %s' % platform_name else: notification['Subject'] = recipients.subject if recipients.region_name: full_message = recipients.region_name + ":\n" + full_message notification.attach(MIMEText(full_message, 'plain', 'UTF-8')) LOG.debug("Length of message: %d", len(full_message)) part = MIMEBase('application', "octet-stream") with open(filename, 'rb') as file: part.set_payload(file.read()) encoders.encode_base64(part) part.add_header( 'Content-Disposition', 'attachment; filename="{}"'.format(Path(filename).name)) notification.attach(part) for recip in recipients.recipients_with_attachment: notification['To'] = recip LOG.info("Send fire notification to %s", str(recip)) LOG.debug("Subject: %s", str(recipients.subject)) txt = notification.as_string() server.sendmail(self.sender, recip, txt) LOG.debug("Text sent: %s", txt) server.quit() def _send_notifications_without_attachments(self, server, recipients, sub_messages, platform_name): """Send notifications without attachments.""" for submsg in sub_messages: notification = MIMEMultipart() notification['From'] = self.sender if platform_name: notification[ 'Subject'] = recipients.subject + ' Satellit = %s' % platform_name else: notification['Subject'] = recipients.subject notification.attach(MIMEText(submsg, 'plain', 'UTF-8')) for recip in recipients.recipients_without_attachment: notification['To'] = recip LOG.info("Send fire notification to %s", str(recip)) LOG.debug("Subject: %s", str(recipients.subject)) txt = notification.as_string() server.sendmail(self.sender, recip, txt) LOG.debug("Text sent: %s", txt) def _get_mailserver_login_credentials(self): """Get the login credentials for the mail server.""" host_secrets = self.secrets.authenticators(self.host) if host_secrets is None: LOG.error("Failed getting authentication secrets for host: %s", self.host) raise IOError("Check out the details in the netrc file: %s", self._netrcfile) username, _, password = host_secrets return username, password def _start_smtp_server(self, username, password, recipients): """Start the smtp server and loging.""" server = smtplib.SMTP(self.smtp_server) server.starttls() server.ehlo(self.domain) server.rcpt(recipients.recipients_all) server.login(username, password) return server def create_message_content(self, gjson_features, unsubscr): """Create the full message string and the list of sub-messages.""" full_msg = '' msg_list = [] outstr = '' for idx, firespot in enumerate(gjson_features): if idx % self.max_number_of_fires_in_sms == 0 and idx > 0: full_msg = full_msg + outstr if len(unsubscr) > 0: outstr = outstr + unsubscr LOG.debug('%d: Sub message = <%s>', idx, outstr) msg_list.append(outstr) outstr = '' lonlats = firespot['geometry']['coordinates'] outstr = outstr + '%f N, %f E\n' % (lonlats[1], lonlats[0]) if ('observation_time' in self.fire_data and 'observation_time' in firespot['properties']): timestr = firespot['properties']['observation_time'] LOG.debug("Time string: %s", str(timestr)) try: dtobj = datetime.fromisoformat(timestr) # Python > 3.6 except AttributeError: dtobj = datetime.strptime( timestr.split('.')[0], '%Y-%m-%dT%H:%M:%S') outstr = outstr + ' %s\n' % dtobj.strftime('%d %b %H:%M') for prop in firespot['properties']: if prop in self.fire_data and prop not in ['observation_time']: if prop in ['power', 'Power']: outstr = outstr + ' FRP: %7.3f MW\n' % ( firespot['properties'][prop]) else: outstr = outstr + ' FRP: %s\n' % (str( firespot['properties'][prop])) LOG.debug("Message length so far: %d", len(outstr)) LOG.debug("Max number of fires in sub message: %d", self.max_number_of_fires_in_sms) if len(outstr) > 0: if len(unsubscr) > 0: outstr = outstr + unsubscr LOG.debug('%d: Sub message = <%s>', idx, outstr) msg_list.append(outstr) full_msg = full_msg + outstr LOG.debug("Full message: <%s>", full_msg) LOG.debug("Sub-messages: <%s>", str(msg_list)) return full_msg, msg_list def close(self): """Shutdown the Notifier process.""" LOG.info('Terminating the End User Notifier process.') self.loop = False try: self.listener.stop() except Exception: LOG.exception("Couldn't stop listener.") if self.publisher: try: self.publisher.stop() except Exception: LOG.exception("Couldn't stop publisher.")
class GeoGatherer(object): """Gatherer for geostationary satellite segments""" def __init__(self, config, section): self._config = config self._section = section topics = config.get(section, 'topics').split() services = "" if config.has_option(section, 'services'): services = config.get(section, 'services').split() self._listener = ListenerContainer(topics=topics, services=services) self._publisher = publisher.NoisyPublisher("geo_gatherer") self._subject = config.get(section, "publish_topic") self._pattern = config.get(section, 'pattern') self._providing_server = None if config.has_option(section, 'providing_server'): self._providing_server = config.get(section, 'providing_server') self._parser = Parser(self._pattern) try: self._timeliness = dt.timedelta( seconds=config.getint(section, "timeliness")) except (NoOptionError, ValueError): self._timeliness = dt.timedelta(seconds=20) self._timeout = None self.metadata = {} self.received_files = set() self.wanted_files = set() self.all_files = set() self.critical_files = set() self.delayed_files = OrderedDict() self.logger = logging.getLogger("geo_gatherer") self._loop = False def _clear_data(self): """Clear data.""" self._timeout = None self.metadata = {} self.received_files = set() self.wanted_files = set() self.all_files = set() self.critical_files = set() self.delayed_files = OrderedDict() def _init_data(self, msg): """Init wanted, all and critical files""" # Init metadata struct for key in msg.data: if key not in ("uid", "uri", "channel_name", "segment"): self.metadata[key] = msg.data[key] self.metadata['dataset'] = [] # Critical files that are required, otherwise production will fail self.critical_files = \ self._compose_filenames(self._config.get(self._section, "critical_files")) # These files are wanted, but not critical for production self.wanted_files = \ self._compose_filenames(self._config.get(self._section, "wanted_files")) self.all_files = \ self._compose_filenames(self._config.get(self._section, "all_files")) def _compose_filenames(self, itm_str): """Compose filename set()s based on a pattern and item string. itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'""" # Empty set result = set() # Get copy of metadata meta = self.metadata.copy() for itm in itm_str.split(','): channel_name, segments = itm.split(':') segments = segments.split('-') if len(segments) > 1: segments = [ '%06d' % i for i in range(int(segments[0]), int(segments[-1]) + 1) ] meta['channel_name'] = channel_name for seg in segments: meta['segment'] = seg fname = self._parser.compose(meta) result.add(fname) return result def _publish(self): """Publish file dataset and reinitialize gatherer.""" # Diagnostic logging about delayed ... if len(self.delayed_files) > 0: file_str = '' for key in self.delayed_files: file_str += "%s %f seconds, " % (key, self.delayed_files[key]) self.logger.warning("Files received late: %s", file_str.strip(', ')) # and missing files missing_files = self.all_files.difference(self.received_files) if len(missing_files) > 0: self.logger.warning("Missing files: %s", ', '.join(missing_files)) msg = message.Message(self._subject, "dataset", self.metadata) self.logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) self._clear_data() def set_logger(self, logger): """Set logger.""" self.logger = logger def collection_ready(self): """Determine if collection is ready to be published.""" # If no files have been collected, return False if len(self.received_files) == 0: return False # If all wanted files have been received, return True if self.wanted_files.union(self.critical_files).issubset( self.received_files): return True # If all critical files have been received ... if self.critical_files.issubset(self.received_files): # and timeout is reached, return True if self._timeout is not None and \ self._timeout <= dt.datetime.utcnow(): return True # else, set timeout if not already running else: if self._timeout is None: self._timeout = dt.datetime.utcnow() + self._timeliness self.logger.info("Setting timeout to %s", str(self._timeout)) return False # In other cases continue gathering return False def run(self): """Run GeoGatherer""" self._publisher.start() self._loop = True while self._loop: # Check if collection is ready for publication if self.collection_ready(): self._publish() # Check listener for new messages msg = None try: msg = self._listener.output_queue.get(True, 1) except AttributeError: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Queue.Empty: continue if msg.type == "file": self.logger.info("New message received: %s", str(msg)) self.process(msg) def stop(self): """Stop gatherer.""" self.logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message""" if self._providing_server and self._providing_server != msg.host: return mda = self._parser.parse(msg.data["uid"]) if msg.data['uid'] in self.received_files: return # Init metadata etc if this is the first file if len(self.metadata) == 0: self._init_data(msg) # If the nominal time of the new segment is later than the # current metadata has, ... elif mda["nominal_time"] > self.metadata["nominal_time"]: # timeout ... self._timeout = dt.datetime.utcnow() # and check if the collection is ready and publish if self.collection_ready(): self._publish() self._clear_data() self._init_data(msg) # or discard data and start new collection else: self.logger.warning("Collection not finished before new " "started") missing_files = self.all_files.difference(self.received_files) self.logger.warning("Missing files: %s", missing_files) self._clear_data() self._init_data(msg) # Add uid and uri self.metadata['dataset'].append({ 'uri': msg.data['uri'], 'uid': msg.data['uid'] }) # If critical files have been received but the collection is # not complete, add the file to list of delayed files if self.critical_files.issubset(self.received_files): delay = dt.datetime.utcnow() - (self._timeout - self._timeliness) self.delayed_files[msg.data['uid']] = delay.total_seconds() # Add to received files self.received_files.add(msg.data['uid'])
class SegmentGatherer(object): """Gatherer for geostationary satellite segments and multifile polar satellite granules.""" _listener = None _publisher = None def __init__(self, config): """Initialize the segment gatherer.""" self._config = config.copy() self._pattern_configs = self._config.pop('patterns') self._subject = None self._timeliness = dt.timedelta(seconds=config.get("timeliness", 1200)) # This get the 'keep_parsed_keys' valid for all patterns self._keep_parsed_keys = self._config.get('keep_parsed_keys', []) self._patterns = self._create_patterns() self._elements = list(self._patterns.keys()) self._time_tolerance = self._config.get("time_tolerance", 30) self._bundle_datasets = self._config.get("bundle_datasets", False) self._num_files_premature_publish = self._config.get( "num_files_premature_publish", -1) self.slots = OrderedDict() self.time_name = self._config.get('time_name', 'start_time') # Floor the scene start time to the given full minutes self._group_by_minutes = self._config.get('group_by_minutes', None) self._loop = False self._providing_server = self._config.get('providing_server') def _create_patterns(self): return { key: Pattern(key, pattern_config, self._config) for key, pattern_config in self._pattern_configs.items() } def _clear_slot(self, time_slot): """Clear data.""" if time_slot in self.slots: del self.slots[time_slot] def _reinitialize_gatherer(self, time_slot, missing_files_check=True): """Publish file dataset and reinitialize gatherer.""" slot = self.slots[time_slot] # Diagnostic logging about delayed ... delayed_files = {} for key in self._elements: delayed_files.update(slot[key]['delayed_files']) if len(delayed_files) > 0: file_str = '' for key, value in delayed_files.items(): file_str += "%s %f seconds, " % (key, value) logger.warning("Files received late: %s", file_str.strip(', ')) # ... and missing files if missing_files_check: missing_files = set([]) for key in self._elements: missing_files = slot[key]['all_files'].difference( slot[key]['received_files']) if len(missing_files) > 0: logger.warning( "Missing files: %s", ', '.join( (str(missing) for missing in missing_files))) # Remove tags that are not necessary for datasets for tag in REMOVE_TAGS: try: del slot.output_metadata[tag] except KeyError: pass output_metadata = slot.output_metadata.copy() if self._bundle_datasets and "dataset" not in output_metadata: output_metadata["dataset"] = [] for collection in output_metadata["collection"].values(): output_metadata["dataset"].extend(collection['dataset']) del output_metadata["collection"] self._publish(output_metadata) def _publish(self, metadata): if "dataset" in metadata: msg = pmessage.Message(self._subject, "dataset", metadata) else: msg = pmessage.Message(self._subject, "collection", metadata) logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) def _generate_publish_service_name(self): publish_service_name = "segment_gatherer" for key in sorted(self._elements): publish_service_name += "_" + str(key) return publish_service_name def _setup_messaging(self): """Set up messaging.""" self._subject = self._config['posttroll']['publish_topic'] topics = self._config['posttroll'].get('topics') addresses = self._config['posttroll'].get('addresses') publish_port = self._config['posttroll'].get('publish_port', 0) nameservers = self._config['posttroll'].get('nameservers', []) services = self._config['posttroll'].get('services', "") self._listener = ListenerContainer(topics=topics, addresses=addresses, services=services) # Name each segment_gatherer with the section/patterns name. # This way the user can subscribe to a specific segment_gatherer service instead of all. publish_service_name = self._generate_publish_service_name() self._publisher = publisher.NoisyPublisher(publish_service_name, port=publish_port, nameservers=nameservers) self._publisher.start() def run(self): """Run SegmentGatherer.""" self._setup_messaging() self._loop = True while self._loop: self.triage_slots() # Check listener for new messages try: msg = self._listener.output_queue.get(True, 1) except AttributeError: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Empty: continue if msg.type in ["file", "dataset"]: # If providing server is configured skip message if not from providing server if self._providing_server and self._providing_server != msg.host: continue logger.info("New message received: %s", str(msg)) self.process(msg) def triage_slots(self): """Check if there are slots ready for publication.""" slots = self.slots.copy() for slot_time, slot in slots.items(): slot_time = str(slot_time) status = slot.get_status() if status == Status.SLOT_READY: # Collection ready, publish and remove self._reinitialize_gatherer(slot_time) self._clear_slot(slot_time) if status == Status.SLOT_READY_BUT_WAIT_FOR_MORE: # Collection ready, publish and but wait for more self._reinitialize_gatherer(slot_time, missing_files_check=False) elif status == Status.SLOT_OBSOLETE_TIMEOUT: # Collection unfinished and obsolete, discard self._clear_slot(slot_time) else: # Collection unfinished, wait for more data pass def stop(self): """Stop gatherer.""" logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: if self._listener.thread is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message.""" # Find the correct parser for this file try: message = self.message_from_posttroll(msg) pattern = message.pattern except TypeError: logger.debug("No parser matching message, skipping.") return # Check if time of the raw is in scheduled range if "_start_time_pattern" in pattern: schedule_ok = self.check_if_time_is_in_interval( pattern["_start_time_pattern"], message.id_time) if not schedule_ok: logger.info( "Hour pattern '%s' skip: %s" + " for start_time: %s", pattern.name, message.uid(), message.id_time.strftime("%H:%M")) return slot_time = self._find_time_slot(message.id_time) # Init metadata etc if this is the first file if slot_time not in self.slots: slot = self._create_slot(message) else: slot = self.slots[slot_time] slot.add_file(message) def message_from_posttroll(self, msg): """Create a message object from a posttroll message instance.""" for pattern in self._patterns.values(): try: if pattern.parser.matches(msg): return Message(msg, pattern) except KeyError as err: logger.debug("No key " + str(err) + " in message.") raise TypeError def _find_time_slot(self, time_obj): """Find time slot and return the slot as a string. If no slots are close enough, return *str(time_obj)* """ for slot in self.slots: time_slot = self.slots[slot].output_metadata[self.time_name] time_diff = time_obj - time_slot if abs(time_diff.total_seconds()) < self._time_tolerance: logger.debug("Found existing time slot, using that") return slot return str(time_obj) def _create_slot(self, message): """Init wanted, all and critical files.""" timestamp = str(message.id_time) logger.debug("Adding new slot: %s", timestamp) slot = Slot(timestamp, message.filtered_metadata, self._patterns, self._timeliness, self._num_files_premature_publish) self.slots[timestamp] = slot return slot def check_if_time_is_in_interval(self, time_range, raw_start_time): """Check if raw time is inside configured interval.""" time_ok = False # Convert check time into int variables raw_time = (60 * raw_start_time.hour) + raw_start_time.minute if time_range["midnight"] and raw_time < time_range["start"]: raw_time += 24 * 60 # Check start and end time if time_range["start"] <= raw_time <= time_range["end"]: # Raw time in range, check interval if ((raw_time - time_range["start"]) % time_range["delta"]) == 0: time_ok = True return time_ok
class Dispatcher(Thread): """Class that dispatches files.""" def __init__(self, config_file, publish_port=None, publish_nameservers=None): """Initialize dispatcher class.""" super().__init__() self.config = None self.topics = None self.listener = None self._publish_port = publish_port self._publish_nameservers = publish_nameservers self.publisher = None self.host = socket.gethostname() self._create_publisher() self.loop = True self.config_handler = DispatchConfig(config_file, self.update_config) signal.signal(signal.SIGTERM, self.signal_shutdown) def _create_publisher(self): if self._publish_port is not None: self.publisher = NoisyPublisher("dispatcher", port=self._publish_port, nameservers=self._publish_nameservers) self.publisher.start() def signal_shutdown(self, *args, **kwargs): """Shutdown dispatcher.""" self.close() def update_config(self, new_config): """Update configuration and reload listeners.""" old_config = self.config topics = set() try: for _client, client_config in new_config.items(): topics |= set(sum([item['topics'] for item in client_config['dispatch_configs']], [])) if self.topics != topics: self.config = new_config self._create_listener(client_config, topics) except KeyError as err: logger.warning('Invalid config for %s, keeping the old one running: %s', _client, str(err)) self.config = old_config def _create_listener(self, client_config, topics): if self.listener is not None: # FIXME: make sure to get the last messages though self.listener.stop() addresses = client_config.get('subscribe_addresses', None) nameserver = client_config.get('nameserver', 'localhost') services = client_config.get('subscribe_services', '') self.listener = ListenerContainer(topics=topics, addresses=addresses, nameserver=nameserver, services=services) self.topics = topics def run(self): """Run dispatcher.""" while self.loop: try: msg = self.listener.output_queue.get(timeout=1) except Empty: continue if msg.type != 'file': continue self._dispatch_from_message(msg) def _dispatch_from_message(self, msg): destinations = self.get_destinations(msg) if destinations: # Check if the url are on another host: url = urlparse(msg.data['uri']) _check_file_locality(url, self.host) success = dispatch(url.path, destinations) if self.publisher: self._publish(msg, destinations, success) def _publish(self, msg, destinations, success): """Publish a message. The URI is replaced with the URI on the target server. """ for url, _, client in destinations: if not success[client]: continue msg = self._get_new_message(msg, url, client) if msg is None: continue logger.debug('Publishing %s', str(msg)) self.publisher.send(str(msg)) def _get_new_message(self, msg, url, client): info = self._get_message_info(msg, url) topic = self._get_topic(client, info) if topic is None: return None return Message(topic, 'file', info) def _get_message_info(self, msg, url): info = msg.data.copy() info["uri"] = urlsplit(url).path return info def _get_topic(self, client, info): topic = self.config[client].get("publish_topic") if topic is None: logger.error("Publish topic not configured for '%s'", client) return None return compose(topic, info) def get_destinations(self, msg): """Get the destinations for this message.""" destinations = [] for client, config in self.config.items(): for dispatch_config in config['dispatch_configs']: destination = self._get_destination(dispatch_config, msg, client) if destination is None: continue destinations.append(destination) return destinations def _get_destination(self, dispatch_config, msg, client): destination = None if _has_correct_topic(dispatch_config, msg): if check_conditions(msg, dispatch_config): destination = self.create_dest_url(msg, client, dispatch_config) return destination def create_dest_url(self, msg, client, conf): """Create the destination URL and the connection parameters.""" config = self.config[client].copy() _verify_filepattern(config, msg) config.update(conf) connection_parameters = config.get('connection_parameters') host = config['host'] metadata = _get_metadata_with_aliases(msg, config) path = compose( os.path.join(config['directory'], config['filepattern']), metadata) parts = urlsplit(host) host_path = urlunsplit((parts.scheme, parts.netloc, path, parts.query, parts.fragment)) return host_path, connection_parameters, client def close(self): """Shutdown the dispatcher.""" logger.info('Terminating dispatcher.') self.loop = False try: self.listener.stop() except Exception: logger.exception("Couldn't stop listener.") if self.publisher: try: self.publisher.stop() except Exception: logger.exception("Couldn't stop publisher.") try: self.config_handler.close() except Exception: logger.exception("Couldn't stop config handler.")