def create_filename(self, fname_pattern, dir_pattern, params=None): '''Parse filename for saving. ''' fname = os.path.join(dir_pattern, fname_pattern) par = Parser(fname) fname = par.compose(params) return fname
def _resolve(self, params, ref_params): resolved_params = dict() for k, v in params.items(): # take only string parameters if isinstance(v, (str, unicode)): par = Parser(v) resolved_params[k] = par.compose(ref_params) elif isinstance(v, dict): resolved_params[k] = self._resolve(v, ref_params) else: resolved_params[k] = v return resolved_params
def regional_fires_filtering_and_publishing(self, msg, regional_fmask, afsff_obj): """From the regional-fires-filter-mask and the fire detection data send regional messages.""" logger.debug( "Perform regional masking on VIIRS AF detections and publish accordingly." ) afdata = afsff_obj.get_af_data() fmda = afsff_obj.metadata fmda['platform'] = afsff_obj.platform_name pout = Parser(self.outfile_pattern_regional) output_messages = [] regions_with_detections = 0 for region_name in regional_fmask: if not regional_fmask[region_name]['some_inside_test_area']: continue regions_with_detections = regions_with_detections + 1 fmda['region_name'] = regional_fmask[region_name]['attributes'][ 'Kod_omr'] out_filepath = os.path.join(self.output_dir, pout.compose(fmda)) logger.debug("Output file path = %s", out_filepath) data_in_region = afdata[regional_fmask[region_name]['mask']] filepath = store_geojson(out_filepath, data_in_region, platform_name=fmda['platform']) if not filepath: logger.warning( "Something wrong happended storing regional " + "data to Geojson - area: {name}".format(str(region_name))) continue outmsg = self._generate_output_message(filepath, msg, regional_fmask[region_name]) output_messages.append(outmsg) logger.info("Geojson file created! Number of fires in region = %d", len(data_in_region)) logger.debug( "Regional masking done. Number of regions with fire " + "detections on this granule: %s", str(regions_with_detections)) return output_messages
class GeoGatherer(object): """Gatherer for geostationary satellite segments""" def __init__(self, config, section): self._config = config self._section = section topics = config.get(section, 'topics').split() services = "" if config.has_option(section, 'services'): services = config.get(section, 'services').split() self._listener = ListenerContainer(topics=topics, services=services) self._publisher = publisher.NoisyPublisher("geo_gatherer") self._subject = config.get(section, "publish_topic") self._pattern = config.get(section, 'pattern') self._providing_server = None if config.has_option(section, 'providing_server'): self._providing_server = config.get(section, 'providing_server') self._parser = Parser(self._pattern) try: self._timeliness = dt.timedelta( seconds=config.getint(section, "timeliness")) except (NoOptionError, ValueError): self._timeliness = dt.timedelta(seconds=20) self._timeout = None self.metadata = {} self.received_files = set() self.wanted_files = set() self.all_files = set() self.critical_files = set() self.delayed_files = OrderedDict() self.logger = logging.getLogger("geo_gatherer") self._loop = False def _clear_data(self): """Clear data.""" self._timeout = None self.metadata = {} self.received_files = set() self.wanted_files = set() self.all_files = set() self.critical_files = set() self.delayed_files = OrderedDict() def _init_data(self, msg): """Init wanted, all and critical files""" # Init metadata struct for key in msg.data: if key not in ("uid", "uri", "channel_name", "segment"): self.metadata[key] = msg.data[key] self.metadata['dataset'] = [] # Critical files that are required, otherwise production will fail self.critical_files = \ self._compose_filenames(self._config.get(self._section, "critical_files")) # These files are wanted, but not critical for production self.wanted_files = \ self._compose_filenames(self._config.get(self._section, "wanted_files")) self.all_files = \ self._compose_filenames(self._config.get(self._section, "all_files")) def _compose_filenames(self, itm_str): """Compose filename set()s based on a pattern and item string. itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'""" # Empty set result = set() # Get copy of metadata meta = self.metadata.copy() for itm in itm_str.split(','): channel_name, segments = itm.split(':') segments = segments.split('-') if len(segments) > 1: segments = [ '%06d' % i for i in range(int(segments[0]), int(segments[-1]) + 1) ] meta['channel_name'] = channel_name for seg in segments: meta['segment'] = seg fname = self._parser.compose(meta) result.add(fname) return result def _publish(self): """Publish file dataset and reinitialize gatherer.""" # Diagnostic logging about delayed ... if len(self.delayed_files) > 0: file_str = '' for key in self.delayed_files: file_str += "%s %f seconds, " % (key, self.delayed_files[key]) self.logger.warning("Files received late: %s", file_str.strip(', ')) # and missing files missing_files = self.all_files.difference(self.received_files) if len(missing_files) > 0: self.logger.warning("Missing files: %s", ', '.join(missing_files)) msg = message.Message(self._subject, "dataset", self.metadata) self.logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) self._clear_data() def set_logger(self, logger): """Set logger.""" self.logger = logger def collection_ready(self): """Determine if collection is ready to be published.""" # If no files have been collected, return False if len(self.received_files) == 0: return False # If all wanted files have been received, return True if self.wanted_files.union(self.critical_files).issubset( self.received_files): return True # If all critical files have been received ... if self.critical_files.issubset(self.received_files): # and timeout is reached, return True if self._timeout is not None and \ self._timeout <= dt.datetime.utcnow(): return True # else, set timeout if not already running else: if self._timeout is None: self._timeout = dt.datetime.utcnow() + self._timeliness self.logger.info("Setting timeout to %s", str(self._timeout)) return False # In other cases continue gathering return False def run(self): """Run GeoGatherer""" self._publisher.start() self._loop = True while self._loop: # Check if collection is ready for publication if self.collection_ready(): self._publish() # Check listener for new messages msg = None try: msg = self._listener.output_queue.get(True, 1) except AttributeError: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Queue.Empty: continue if msg.type == "file": self.logger.info("New message received: %s", str(msg)) self.process(msg) def stop(self): """Stop gatherer.""" self.logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message""" if self._providing_server and self._providing_server != msg.host: return mda = self._parser.parse(msg.data["uid"]) if msg.data['uid'] in self.received_files: return # Init metadata etc if this is the first file if len(self.metadata) == 0: self._init_data(msg) # If the nominal time of the new segment is later than the # current metadata has, ... elif mda["nominal_time"] > self.metadata["nominal_time"]: # timeout ... self._timeout = dt.datetime.utcnow() # and check if the collection is ready and publish if self.collection_ready(): self._publish() self._clear_data() self._init_data(msg) # or discard data and start new collection else: self.logger.warning("Collection not finished before new " "started") missing_files = self.all_files.difference(self.received_files) self.logger.warning("Missing files: %s", missing_files) self._clear_data() self._init_data(msg) # Add uid and uri self.metadata['dataset'].append({ 'uri': msg.data['uri'], 'uid': msg.data['uid'] }) # If critical files have been received but the collection is # not complete, add the file to list of delayed files if self.critical_files.issubset(self.received_files): delay = dt.datetime.utcnow() - (self._timeout - self._timeliness) self.delayed_files[msg.data['uid']] = delay.total_seconds() # Add to received files self.received_files.add(msg.data['uid'])
class SegmentGatherer(object): """Gatherer for geostationary satellite segments and multifile polar satellite granules.""" def __init__(self, config, section): self._config = config self._section = section topics = config.get(section, 'topics').split() self._listener = ListenerContainer(topics=topics) self._publisher = publisher.NoisyPublisher("segment_gatherer") self._subject = config.get(section, "publish_topic") self._pattern = config.get(section, 'pattern') self._parser = Parser(self._pattern) try: self._timeliness = dt.timedelta( seconds=config.getint(section, "timeliness")) except (NoOptionError, ValueError): self._timeliness = dt.timedelta(seconds=1200) try: self._num_files_premature_publish = \ config.getint(section, "num_files_premature_publish") except (NoOptionError, ValueError): self._num_files_premature_publish = -1 self.slots = OrderedDict() self.time_name = config.get(section, 'time_name') self.logger = logging.getLogger("segment_gatherer") self._loop = False def _clear_data(self, time_slot): """Clear data.""" if time_slot in self.slots: del self.slots[time_slot] def _init_data(self, msg, mda): """Init wanted, all and critical files""" # Init metadata struct metadata = {} for key in msg.data: if key not in ("uid", "uri", "channel_name", "segment"): metadata[key] = msg.data[key] metadata['dataset'] = [] # Use also metadata parsed from the filenames metadata.update(mda) time_slot = str(metadata[self.time_name]) self.slots[time_slot] = {} self.slots[time_slot]['metadata'] = metadata.copy() # Critical files that are required, otherwise production will fail. # If there are no critical files, empty set([]) is used. try: critical_segments = self._config.get(self._section, "critical_files") self.slots[time_slot]['critical_files'] = \ self._compose_filenames(time_slot, critical_segments) except (NoOptionError, ValueError): self.slots[time_slot]['critical_files'] = set([]) # These files are wanted, but not critical to production self.slots[time_slot]['wanted_files'] = \ self._compose_filenames(time_slot, self._config.get(self._section, "wanted_files")) # Name of all the files self.slots[time_slot]['all_files'] = \ self._compose_filenames(time_slot, self._config.get(self._section, "all_files")) self.slots[time_slot]['received_files'] = set([]) self.slots[time_slot]['delayed_files'] = dict() self.slots[time_slot]['missing_files'] = set([]) self.slots[time_slot]['timeout'] = None self.slots[time_slot]['files_till_premature_publish'] = \ self._num_files_premature_publish def _compose_filenames(self, time_slot, itm_str): """Compose filename set()s based on a pattern and item string. itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'""" # Empty set result = set() # Get copy of metadata meta = self.slots[time_slot]['metadata'].copy() # Replace variable tags (such as processing time) with a # wildcard, as these can't be forecasted. try: for tag in self._config.get(self._section, 'variable_tags').split(','): meta[tag] = '*' except NoOptionError: pass for itm in itm_str.split(','): channel_name, segments = itm.split(':') segments = segments.split('-') if len(segments) > 1: segments = [ '%d' % i for i in range(int(segments[0]), int(segments[-1]) + 1) ] meta['channel_name'] = channel_name for seg in segments: meta['segment'] = seg fname = self._parser.compose(meta) result.add(fname) return result def _publish(self, time_slot, missing_files_check=True): """Publish file dataset and reinitialize gatherer.""" data = self.slots[time_slot] # Diagnostic logging about delayed ... delayed_files = data['delayed_files'] if len(delayed_files) > 0: file_str = '' for key in delayed_files: file_str += "%s %f seconds, " % (key, delayed_files[key]) self.logger.warning("Files received late: %s", file_str.strip(', ')) if missing_files_check: # and missing files missing_files = data['all_files'].difference( data['received_files']) if len(missing_files) > 0: self.logger.warning("Missing files: %s", ', '.join(missing_files)) msg = message.Message(self._subject, "dataset", data['metadata']) self.logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) # self._clear_data(time_slot) def set_logger(self, logger): """Set logger.""" self.logger = logger def slot_ready(self, slot): """Determine if slot is ready to be published.""" # If no files have been collected, return False if len(slot['received_files']) == 0: return SLOT_NOT_READY time_slot = str(slot['metadata'][self.time_name]) wanted_and_critical_files = \ slot['wanted_files'].union(slot['critical_files']) num_wanted_and_critical_files_received = \ len(wanted_and_critical_files & slot['received_files']) self.logger.debug("Got %s wanted or critical files in slot %s.", num_wanted_and_critical_files_received, time_slot) if num_wanted_and_critical_files_received \ == slot['files_till_premature_publish']: slot['files_till_premature_publish'] = -1 return SLOT_READY_BUT_WAIT_FOR_MORE # If all wanted files have been received, return True if wanted_and_critical_files.issubset(slot['received_files']): self.logger.info("All files received for slot %s.", time_slot) return SLOT_READY if slot['critical_files'].issubset(slot['received_files']): # All critical files have been received if slot['timeout'] is None: # Set timeout slot['timeout'] = dt.datetime.utcnow() + self._timeliness self.logger.info("Setting timeout to %s for slot %s.", str(slot['timeout']), time_slot) return SLOT_NOT_READY elif slot['timeout'] < dt.datetime.utcnow(): # Timeout reached, collection ready self.logger.info( "Timeout occured, required files received " "for slot %s.", time_slot) return SLOT_READY else: pass else: if slot['timeout'] is None: slot['timeout'] = dt.datetime.utcnow() + self._timeliness self.logger.info("Setting timeout to %s for slot %s", str(slot['timeout']), time_slot) return SLOT_NOT_READY elif slot['timeout'] < dt.datetime.utcnow(): # Timeout reached, collection is obsolete self.logger.warning( "Timeout occured and required files " "were not present, data discarded for " "slot %s.", time_slot) return SLOT_OBSOLETE_TIMEOUT else: pass # Timeout not reached, wait for more files return SLOT_NOT_READY def run(self): """Run SegmentGatherer""" self._publisher.start() self._loop = True while self._loop: # Check if there are slots ready for publication slots = self.slots.copy() for slot in slots: slot = str(slot) status = self.slot_ready(slots[slot]) if status == SLOT_READY: # Collection ready, publish and remove self._publish(slot) self._clear_data(slot) if status == SLOT_READY_BUT_WAIT_FOR_MORE: # Collection ready, publish and but wait for more self._publish(slot, missing_files_check=False) elif status == SLOT_OBSOLETE_TIMEOUT: # Collection unfinished and obslote, discard self._clear_data(slot) else: # Collection unfinished, wait for more data pass # Check listener for new messages msg = None try: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Queue.Empty: continue if msg.type == "file": self.logger.info("New message received: %s", str(msg)) self.process(msg) def stop(self): """Stop gatherer.""" self.logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message""" try: mda = self._parser.parse(msg.data["uid"]) except ValueError: self.logger.debug("Unknown file, skipping.") return time_slot = str(mda[self.time_name]) # Init metadata etc if this is the first file if time_slot not in self.slots: self._init_data(msg, mda) slot = self.slots[time_slot] # Replace variable tags (such as processing time) with a # wildcard, as these can't be forecasted. try: for tag in self._config.get(self._section, 'variable_tags').split(','): mda[tag] = '*' except NoOptionError: pass mask = self._parser.compose(mda) if mask in slot['received_files']: return # Add uid and uri slot['metadata']['dataset'].append({ 'uri': msg.data['uri'], 'uid': msg.data['uid'] }) # If critical files have been received but the slot is # not complete, add the file to list of delayed files if len(slot['critical_files']) > 0 and \ slot['critical_files'].issubset(slot['received_files']): delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness) slot['delayed_files'][msg.data['uid']] = delay.total_seconds() # Add to received files slot['received_files'].add(mask)
class GeoGatherer(object): """Gatherer for geostationary satellite segments""" def __init__(self, config, section): self._config = config self._section = section topics = config.get(section, 'topics').split() self._listener = ListenerContainer(topics=topics) self._publisher = publisher.NoisyPublisher("geo_gatherer") self._subject = config.get(section, "publish_topic") self._pattern = config.get(section, 'pattern') self._parser = Parser(self._pattern) try: self._timeliness = dt.timedelta(seconds=config.getint(section, "timeliness")) except (NoOptionError, ValueError): self._timeliness = dt.timedelta(seconds=20) self._timeout = None self.metadata = {} self.received_files = set() self.wanted_files = set() self.all_files = set() self.critical_files = set() self.delayed_files = OrderedDict() self.logger = logging.getLogger("geo_gatherer") self._loop = False def _clear_data(self): """Clear data.""" self._timeout = None self.metadata = {} self.received_files = set() self.wanted_files = set() self.all_files = set() self.critical_files = set() self.delayed_files = OrderedDict() def _init_data(self, msg): """Init wanted, all and critical files""" # Init metadata struct for key in msg.data: if key not in ("uid", "uri", "channel_name", "segment"): self.metadata[key] = msg.data[key] self.metadata['dataset'] = [] # Critical files that are required, otherwise production will fail self.critical_files = \ self._compose_filenames(self._config.get(self._section, "critical_files")) # These files are wanted, but not critical for production self.wanted_files = \ self._compose_filenames(self._config.get(self._section, "wanted_files")) self.all_files = \ self._compose_filenames(self._config.get(self._section, "all_files")) def _compose_filenames(self, itm_str): """Compose filename set()s based on a pattern and item string. itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'""" # Empty set result = set() # Get copy of metadata meta = self.metadata.copy() for itm in itm_str.split(','): channel_name, segments = itm.split(':') segments = segments.split('-') if len(segments) > 1: segments = ['%06d' % i for i in range(int(segments[0]), int(segments[-1])+1)] meta['channel_name'] = channel_name for seg in segments: meta['segment'] = seg fname = self._parser.compose(meta) result.add(fname) return result def _publish(self): """Publish file dataset and reinitialize gatherer.""" # Diagnostic logging about delayed ... if len(self.delayed_files) > 0: file_str = '' for key in self.delayed_files: file_str += "%s %f seconds, " % (key, self.delayed_files[key]) self.logger.warning("Files received late: %s", file_str.strip(', ')) # and missing files missing_files = self.all_files.difference(self.received_files) if len(missing_files) > 0: self.logger.warning("Missing files: %s", ', '.join(missing_files)) msg = message.Message(self._subject, "dataset", self.metadata) self.logger.info("Sending: %s", str(msg)) self._publisher.send(str(msg)) self._clear_data() def set_logger(self, logger): """Set logger.""" self.logger = logger def collection_ready(self): """Determine if collection is ready to be published.""" # If no files have been collected, return False if len(self.received_files) == 0: return False # If all wanted files have been received, return True if self.wanted_files.union(self.critical_files).issubset(\ self.received_files): return True # If all critical files have been received ... if self.critical_files.issubset(self.received_files): # and timeout is reached, return True if self._timeout is not None and \ self._timeout <= dt.datetime.utcnow(): return True # else, set timeout if not already running else: if self._timeout is None: self._timeout = dt.datetime.utcnow() + self._timeliness self.logger.info("Setting timeout to %s", str(self._timeout)) return False # In other cases continue gathering return False def run(self): """Run GeoGatherer""" self._publisher.start() self._loop = True while self._loop: # Check if collection is ready for publication if self.collection_ready(): self._publish() # Check listener for new messages msg = None try: msg = self._listener.queue.get(True, 1) except KeyboardInterrupt: self.stop() continue except Queue.Empty: continue if msg.type == "file": self.logger.info("New message received: %s", str(msg)) self.process(msg) def stop(self): """Stop gatherer.""" self.logger.info("Stopping gatherer.") self._loop = False if self._listener is not None: self._listener.stop() if self._publisher is not None: self._publisher.stop() def process(self, msg): """Process message""" mda = self._parser.parse(msg.data["uid"]) if msg.data['uid'] in self.received_files: return # Init metadata etc if this is the first file if len(self.metadata) == 0: self._init_data(msg) # If the nominal time of the new segment is later than the # current metadata has, ... elif mda["nominal_time"] > self.metadata["nominal_time"]: # timeout ... self._timeout = dt.datetime.utcnow() # and check if the collection is ready and publish if self.collection_ready(): self._publish() self._clear_data() self._init_data(msg) # or discard data and start new collection else: self.logger.warning("Collection not finished before new " "started") missing_files = self.all_files.difference(self.received_files) self.logger.warning("Missing files: %s", missing_files) self._clear_data() self._init_data(msg) # Add uid and uri self.metadata['dataset'].append({'uri': msg.data['uri'], 'uid': msg.data['uid']}) # If critical files have been received but the collection is # not complete, add the file to list of delayed files if self.critical_files.issubset(self.received_files): delay = dt.datetime.utcnow() - (self._timeout - self._timeliness) self.delayed_files[msg.data['uid']] = delay.total_seconds() # Add to received files self.received_files.add(msg.data['uid'])