def save_images(self, img): """Save image(s)""" # Loop through different image sizes num = np.max([len(self.sizes), len(self.crops), len(self.tags)]) for i in range(num): img_out = img.copy() # Crop the image try: img_out = crop_image(img_out, self.crops[i]) logging.debug("Applied crop: %s", str(self.crops[i])) except IndexError: logging.debug("No valid crops configured") # Resize the image try: img_out = resize_image(img_out, self.sizes[i]) except IndexError: logging.debug("No valid sizes configured") # Update existing image if configured to do so if self.update_existing and len(self.existing_fname_parts) > 0: try: self.existing_fname_parts['tag'] = self.tags[i] except IndexError: pass fname = compose(os.path.join(self.out_dir, self.out_pattern), self.existing_fname_parts) img_out = self._update_existing_img(img_out, fname) # Add text img_out = self._add_text(img_out, update_img=True) # In other case, save as a new image else: # Add text img_out = self._add_text(img_out, update_img=False) # Compose filename try: self.fileparts['tag'] = self.tags[i] except IndexError: pass fname = compose(os.path.join(self.out_dir, self.out_pattern), self.fileparts) # Save image save_image(img_out, fname, adef=self.area_def, fill_value=self.fill_value, save_options=self.save_options) # Update static image, if given in config try: self.fileparts['tag'] = self.tags[i] except IndexError: pass self._update_static_img(img_out)
def test_compose(self): """Test the compose method's custom conversion options.""" from trollsift import compose key_vals = {'a': 'this Is A-Test b_test c test'} new_str = compose("{a!c}", key_vals) self.assertEqual(new_str, 'This is a-test b_test c test') new_str = compose("{a!h}", key_vals) self.assertEqual(new_str, 'thisisatestbtestctest') new_str = compose("{a!H}", key_vals) self.assertEqual(new_str, 'THISISATESTBTESTCTEST') new_str = compose("{a!l}", key_vals) self.assertEqual(new_str, 'this is a-test b_test c test') new_str = compose("{a!R}", key_vals) self.assertEqual(new_str, 'thisIsATestbtestctest') new_str = compose("{a!t}", key_vals) self.assertEqual(new_str, 'This Is A-Test B_Test C Test') new_str = compose("{a!u}", key_vals) self.assertEqual(new_str, 'THIS IS A-TEST B_TEST C TEST') # builtin repr new_str = compose("{a!r}", key_vals) self.assertEqual(new_str, '\'this Is A-Test b_test c test\'') # no formatting new_str = compose("{a}", key_vals) self.assertEqual(new_str, 'this Is A-Test b_test c test') # bad formatter self.assertRaises(ValueError, compose, "{a!X}", key_vals) self.assertEqual(new_str, 'this Is A-Test b_test c test')
def _add_text(self, img, update_img=False): """Add text to the given image""" if self.text_pattern is None: return img if update_img: text = compose(self.text_pattern, self.existing_fname_parts) else: text = compose(self.text_pattern, self.fileparts) return add_text(img, text, self.text_settings)
def _prepare_filename_and_directory(fmat): """Compose the directory and filename (returned in that order) from *fmat*.""" # filename composing fname_pattern = fmat['fname_pattern'] directory = compose(fmat.get('output_dir', ''), fmat) filename = os.path.join(directory, compose(fname_pattern, fmat)) # directory creation if directory and not os.path.exists(directory): os.makedirs(directory) return directory, filename
def _create_layers(config, cat, property_file): """Create all configured layers.""" workspace = config["workspace"] time_dim = config["time_dimension"] # Make sure the workspace exists create_workspace(workspace, cat) layer_directories = utils.get_exposed_layer_directories(config) # Create all the configured layers and add time dimension for layer_config in config["layers"]: meta = _collect_layer_metadata(config, layer_config) layer_name = trollsift.compose( meta.get("name", meta.get("layer_pattern")), meta) if layer_name is None: logger.error("No layer name defined!") logger.error("Config items: %s", str(meta)) continue # Write WKT to .prj for all existing files before creating the # layer. This is optional, and can help with files without # embedded projection metadata, or the embedded metadata is in a format # Geoserver doesn't understand. utils.write_wkt_for_files(config, layer_directories[layer_name]) if _create_layer(cat, workspace, layer_name, property_file): if not add_layer_metadata(cat, workspace, layer_name, time_dim, meta): continue # Delete the empty image from database (does not remove the file) for fname in config["properties"].get("files", []): delete_granule(cat, workspace, layer_name, fname)
def create_message(fmat, mda): """Create a message topic and mda.""" topic_pattern = fmat["publish_topic"] file_mda = mda.copy() file_mda.update(fmat.get('extra_metadata', {})) file_mda['uri'] = os.path.abspath(fmat['filename']) file_mda['uid'] = os.path.basename(fmat['filename']) file_mda['product'] = fmat['product'] file_mda['area'] = fmat['area'] for key in ['productname', 'areaname', 'format']: try: file_mda[key] = fmat[key] except KeyError: pass for extra_info in [ 'area_coverage_percent', 'area_sunlight_coverage_percent' ]: try: file_mda[extra_info] = fmat[extra_info] except KeyError: pass topic = compose(topic_pattern, fmat) return topic, file_mda
def create_dest_url(self, msg, client, disp_config): """Create the destination URL and the connection parameters.""" defaults = self.config[client] info_dict = dict() for key in ['host', 'directory', 'filepattern']: try: info_dict[key] = disp_config[key] except KeyError: info_dict[key] = defaults[key] connection_parameters = disp_config.get( 'connection_parameters', defaults.get('connection_parameters')) host = info_dict['host'] path = os.path.join(info_dict['directory'], info_dict['filepattern']) mda = msg.data.copy() for key, aliases in defaults.get('aliases', {}).items(): if isinstance(aliases, dict): aliases = [aliases] for alias in aliases: new_key = alias.pop("_alias_name", key) if key in msg.data: mda[new_key] = alias.get(msg.data[key], msg.data[key]) path = compose(path, mda) parts = urlsplit(host) host_path = urlunsplit( (parts.scheme, parts.netloc, path, parts.query, parts.fragment)) return host_path, connection_parameters, client
def _get_topic(self, mda): if self.publish_topic is not None: logger.debug("Composing topic.") subject = compose(self.publish_topic, mda) else: logger.debug("Using default topic.") subject = "/".join( ("", mda["format"], mda["data_processing_level"], '')) return subject
def save_text(self, fname=None): """Save forest fires""" if self.data is None: return if fname is None: if "text_fname_pattern" in self.config: fname = self.config["text_fname_pattern"] try: template = self.config['text_template'] except KeyError: logging.warning("No output template given, using default: %s", DEFAULT_TEMPLATE) template = DEFAULT_TEMPLATE try: header = self.config["text_header"] except KeyError: header = DEFAULT_HEADER minimum_quality_level = self.config.get("minimum_quality_level", QUALITY_NOT_FIRE) output_text = [] for itm in self.fires: if self.fires[itm]['quality'] >= minimum_quality_level: output_text.append(compose(template, self.fires[itm])) else: logging.warning("Item filtered based on 'quality': %s", str(self.fires[itm])) output_text = ''.join(output_text) if fname is None: print(output_text) else: fname = compose(fname, self.metadata) with open(fname, 'w') as fid: if header is not None: fid.write(header) if not header.endswith('\n'): fid.write('\n') fid.write(output_text) logging.info("Output written to %s", fname) if "text_publish_topic" in self.config: self.send_message(self.config["text_publish_topic"], fname)
def _create_message(self, area, fname, scn_metadata, productname): """Create a message and add it to self.messages""" # No messaging without a topic if self._topic is None: return try: area_data = { "name": area.name, "area_id": area.area_id, "proj_id": area.proj_id, "proj4": area.proj4_string, "shape": (area.x_size, area.y_size) } except AttributeError: area_data = None # Create message metadata dictionary to_send = \ utils.select_dict_items(scn_metadata, self._publish_vars) to_send_fix = { "nominal_time": scn_metadata["start_time"], "uid": os.path.basename(fname), "uri": os.path.abspath(fname), "area": area_data, "productname": productname } to_send.update(to_send_fix) topic = self._topic # Compose the topic with area information if area_data is not None: tmp = to_send.copy() del tmp["area"] area_data.update(tmp) topic = compose(topic, area_data) else: topic = compose(topic, {'area_id': 'satproj'}) # Create message msg = Message(topic, "file", to_send) self.messages.append(msg)
def _check_existing(self, start_time): """Check if there's an existing image that should be updated""" # check if something silmiar has already been made: # checks for: platform_name, areaname and # start_time +- timeliness minutes check_start_time = start_time - \ dt.timedelta(minutes=self.timeliness) check_dict = self.fileparts.copy() try: check_dict["tag"] = self.tags[0] except IndexError: pass if self.is_backup: check_dict["platform_name"] = '*' check_dict["sat_loc"] = '*' # check_dict["composite"] = '*' first_overpass = True update_fname_parts = {} for i in range(2 * self.timeliness + 1): check_dict[self.time_name] = \ check_start_time + dt.timedelta(minutes=i) glob_pattern = compose( os.path.join(self.out_dir, self.out_pattern), check_dict) logging.debug("Check pattern: %s", glob_pattern) glob_fnames = glob.glob(glob_pattern) if len(glob_fnames) > 0: fname = os.path.basename(glob_fnames[0]) first_overpass = False logging.debug("Found files: %s", str(glob_fnames)) try: update_fname_parts = parse(self.out_pattern, fname) update_fname_parts["composite"] = \ self.fileparts["composite"] if not self.is_backup: try: update_fname_parts["platform_name"] = \ self.fileparts["platform_name"] return update_fname_parts except KeyError: pass except ValueError: logging.debug("Parsing failed for update_fname_parts.") logging.debug("out_pattern: %s, basename: %s", self.out_pattern, fname) update_fname_parts = {} # Only backup, so save only if there were no matches if self.is_backup and not first_overpass: logging.info("File already exists, no backuping needed.") return None # No existing image else: return {}
def save_hdf5(self, fname=None): """Save self.fires to YAML file""" if self.data is None: return if fname is None: fname = self.config["hdf5_fname_pattern"] fname = compose(fname, self.metadata) utils.save_hdf5(fname, self.fires) logging.info("Output written to %s", fname) if "hdf5_publish_topic" in self.config: self.send_message(self.config["text_publish_topic"], fname)
def _get_fname_parts(self, slot, composite): """Get filename part dictionary""" file_parts = {'composite': composite, 'nominal_time': slot, 'areaname': self.adef.area_id} fname_out = compose(self.config["out_pattern"], file_parts) file_parts['uri'] = fname_out file_parts['uid'] = os.path.basename(fname_out) return file_parts
def test_filepublisher_without_compose(self): from trollflow2.plugins import FilePublisher from trollflow2.dict_tools import plist_iter from trollsift import compose import os.path with mock.patch('trollflow2.plugins.Message') as message, mock.patch('trollflow2.plugins.NoisyPublisher'): pub = FilePublisher() pub.pub.start.assert_called_once() product_list = self.product_list.copy() product_list['product_list']['publish_topic'] = '/static_topic' job = {'product_list': product_list, 'input_mda': self.input_mda} topic_pattern = job['product_list']['product_list']['publish_topic'] topics = [] # Create filenames and topics for fmat, fmat_config in plist_iter(job['product_list']['product_list'], job['input_mda'].copy()): fname_pattern = fmat['fname_pattern'] filename = compose(os.path.join(fmat['output_dir'], fname_pattern), fmat) fmat.pop('format', None) fmat_config['filename'] = filename topics.append(compose(topic_pattern, fmat)) pub(job) message.assert_called() pub.pub.send.assert_called() pub.__del__() pub.pub.stop.assert_called() i = 0 for area in job['product_list']['product_list']['areas']: for prod in job['product_list']['product_list']['areas'][area]: # Skip calls to __str__ if 'call().__str__()' != str(message.mock_calls[i]): self.assertTrue(topics[i] in str(message.mock_calls[i])) i += 1
def save_datasets(job): """Save the datasets (and trigger the computation). If the `use_tmp_file` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) renames = {} for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): fname_pattern = fmat['fname_pattern'] filename = compose(os.path.join(fmat['output_dir'], fname_pattern), fmat) directory = fmat['output_dir'] if not os.path.exists(directory): os.makedirs(directory) if fmat.get('use_tmp_file', False): file_object = NamedTemporaryFile(delete=False, dir=directory) tmp_filename = file_object.name file_object.close() os.chmod(tmp_filename, 0o644) renames[tmp_filename] = filename filename = tmp_filename fmat.pop('format', None) fmat.pop('filename', None) try: # TODO: make these datasetIDs to take resolution into account res = fmat.get('resolution', None) dsid = DatasetID(name=fmat['product'], resolution=res, modifiers=None) objs.append(scns[fmat['area']].save_dataset(dsid, filename=filename, compute=False, **fmat_config)) except KeyError as err: LOG.info('Skipping %s: %s', fmat['productname'], str(err)) else: fmat_config['filename'] = renames.get(filename, filename) compute_writer_results(objs) for tmp_name, actual_name in renames.items(): os.rename(tmp_name, actual_name)
def save_datasets(job): scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.update(job['product_list']['common']) base_config.pop('dataset', None) for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): fname_pattern = fmat['fname_pattern'] outdir = fmat['output_dir'] filename = compose(os.path.join(outdir, fname_pattern), fmat) fmat.pop('format', None) objs.append(scns[fmat['areaname']].save_dataset(fmat['productname'], filename=filename, compute=False, **fmat)) fmat_config['filename'] = filename compute_writer_results(objs)
def _update_static_img(self, img): """Update image with static filename""" if self.static_image_fname_pattern is None: return fname = compose( os.path.join(self.out_dir, self.static_image_fname_pattern), self.fileparts) img = self._update_existing_img(img, fname) img = self._add_text(img, update_img=False) save_image(img, fname, adef=self.area_def, fill_value=self.fill_value, save_options=self.save_options) logging.info("Updated image with static filename: %s", fname)
def create_dest_url(self, msg, client, conf): """Create the destination URL and the connection parameters.""" config = self.config[client].copy() _verify_filepattern(config, msg) config.update(conf) connection_parameters = config.get('connection_parameters') host = config['host'] metadata = _get_metadata_with_aliases(msg, config) path = compose( os.path.join(config['directory'], config['filepattern']), metadata) parts = urlsplit(host) host_path = urlunsplit((parts.scheme, parts.netloc, path, parts.query, parts.fragment)) return host_path, connection_parameters, client
def terminator(metadata, publish_topic=None): """Dummy terminator function. """ sorted_mda = sorted(metadata, key=lambda x: x["start_time"]) mda = metadata[0].copy() if publish_topic is not None: LOGGER.info("Composing topic.") subject = compose(publish_topic, mda) else: LOGGER.info("Using default topic.") subject = "/".join(("", mda["format"], mda["data_processing_level"], '')) mda['end_time'] = sorted_mda[-1]['end_time'] mda['collection_area_id'] = sorted_mda[-1]['collection_area_id'] mda['collection'] = [] is_correct = False for meta in sorted_mda: new_mda = {} if "uri" in meta or 'dataset' in meta: is_correct = True for key in ['dataset', 'uri', 'uid']: if key in meta: new_mda[key] = meta[key] new_mda['start_time'] = meta['start_time'] new_mda['end_time'] = meta['end_time'] mda['collection'].append(new_mda) for key in ['dataset', 'uri', 'uid']: if key in mda: del mda[key] if is_correct: msg = message.Message(subject, "collection", mda) LOGGER.info("sending %s", str(msg)) PUB.send(str(msg)) else: LOGGER.warning("Malformed metadata, no key: %s", "uri")
def terminator(metadata, publish_topic=None): """Dummy terminator function. """ sorted_mda = sorted(metadata, key=lambda x: x["start_time"]) mda = metadata[0].copy() if publish_topic is not None: LOGGER.info("Composing topic.") subject = compose(publish_topic, mda) else: LOGGER.info("Using default topic.") subject = "/".join( ("", mda["format"], mda["data_processing_level"], '')) mda['start_time'] = sorted_mda[0]['start_time'] mda['end_time'] = sorted_mda[-1]['end_time'] mda['collection_area_id'] = sorted_mda[-1]['collection_area_id'] mda['collection'] = [] is_correct = False for meta in sorted_mda: new_mda = {} if "uri" in meta or 'dataset' in meta: is_correct = True for key in ['dataset', 'uri', 'uid']: if key in meta: new_mda[key] = meta[key] new_mda['start_time'] = meta['start_time'] new_mda['end_time'] = meta['end_time'] mda['collection'].append(new_mda) for key in ['dataset', 'uri', 'uid']: if key in mda: del mda[key] if is_correct: msg = message.Message(subject, "collection", mda) LOGGER.info("sending %s", str(msg)) PUB.send(str(msg)) else: LOGGER.warning("Malformed metadata, no key: %s", "uri")
def _publish(self, msg, destinations, success): """Publish a message. The URI is replaced with the URI on the target server. """ for url, params, client in destinations: if not success[client]: continue del params info = msg.data.copy() info["uri"] = urlsplit(url).path topic = self.config[client].get("publish_topic") if topic is None: logger.error("Publish topic not configured for '%s'", client) continue topic = compose(topic, info) msg = Message(topic, 'file', info) logger.debug('Publishing %s', str(msg)) self.publisher.send(str(msg))
def create_aligned_datetime_var(var_pattern, info_dict): """Create an aligned datetime variable. Uses *var_patterns* like "{time:%Y%m%d%H%M|align(15)}" to new datetime including support for temporal alignment (Ceil/Round a datetime object to a multiple of a timedelta. Useful to equalize small time differences in name of files belonging to the same timeslot). """ mtch = re.match('{(.*?)(!(.*?))?(\\:(.*?))?(\\|(.*?))?}', var_pattern) if mtch is None: return None # parse date format pattern key = mtch.groups()[0] # format_spec = mtch.groups()[4] transform = mtch.groups()[6] date_val = info_dict[key] if not isinstance(date_val, dt.datetime): return None # only for datetime types res = date_val if transform: align_params = _parse_align_time_transform(transform) if align_params: res = align_time(date_val, dt.timedelta(minutes=align_params[0]), dt.timedelta(minutes=align_params[1]), align_params[2]) if res is None: # fallback to default compose when no special handling needed # NOTE: This will fail, there's no `var_val`!!! res = compose(var_val, self.info) # noqa return res
def get_exposed_layer_directories(config): """Get full directory paths to each configured layer.""" exposed_base_dir = config.get("exposed_base_dir") create_subdirectories = config.get("create_subdirectories", True) if exposed_base_dir is None: logger.warning("No 'exposed_base_dir' given in config, using " "current directory") exposed_base_dir = os.path.curdir dirs = {} common_items = config.get("common_items", dict()) for layer_config in config["layers"]: meta = common_items.copy() meta.update(layer_config) layer_name = trollsift.compose( meta.get("name", meta.get("layer_pattern")), meta) if create_subdirectories: path = os.path.join(exposed_base_dir, layer_name) else: path = exposed_base_dir dirs[layer_name] = path return dirs
def add_layer_metadata(cat, workspace, layer_name, time_dim, meta): """Add metadata for the given layer.""" coverage = cat.get_resource(workspace=workspace, store=layer_name) if coverage is None: logger.error( "Could not get coverage for workspace '%s' and store '%s'", workspace, layer_name) return False for attribute in LAYER_ATTRIBUTES: if attribute in meta: attr = _get_and_clean_attribute(meta[attribute]) if isinstance(attr, str): attr = trollsift.compose(attr, meta) setattr(coverage, attribute, attr) coverage = _add_time_dimension(coverage, time_dim) coverage = _add_cache_age_max(coverage, meta.get("cache_age_max", None)) # Save the added metadata cat.save(coverage) logger.info("Metadata written for layer '%s' on workspace '%s'", layer_name, workspace) return True
def __call__(self, job): """Call the publisher.""" mda = job['input_mda'].copy() mda.pop('dataset', None) mda.pop('collection', None) for fmat, _fmat_config in plist_iter( job['product_list']['product_list']): prod_path = "/product_list/areas/%s/products/%s" % ( fmat['area'], fmat['product']) topic_pattern = get_config_value(job['product_list'], prod_path, "publish_topic") file_mda = mda.copy() try: file_mda['uri'] = fmat['filename'] except KeyError: continue file_mda['uid'] = os.path.basename(fmat['filename']) topic = compose(topic_pattern, fmat) msg = Message(topic, 'file', file_mda) LOG.debug('Publishing %s', str(msg)) self.pub.send(str(msg)) self.pub.stop()
def parse_file_info(self, event): '''Parse satellite and orbit information from the filename. Message is sent, if a matching filepattern is found. ''' try: LOGGER.debug("filter: %s\t event: %s", self.file_parser.fmt, event.pathname) pathname_join = os.path.basename(event.pathname) if 'origin_inotify_base_dir_skip_levels' in self.custom_vars: pathname_list = event.pathname.split('/') pathname_join = "/".join(pathname_list[int( self.custom_vars['origin_inotify_base_dir_skip_levels']):]) else: LOGGER.debug( "No origin_inotify_base_dir_skip_levels in self.custom_vars" ) self.info = OrderedDict() self.info.update(self.file_parser.parse(pathname_join)) LOGGER.debug("Extracted: %s", str(self.info)) except ValueError: # Filename didn't match pattern, so empty the info dict LOGGER.info("Couldn't extract any usefull information") self.info = OrderedDict() else: self.info['uri'] = event.pathname self.info['uid'] = os.path.basename(event.pathname) self.info['sensor'] = self.instrument.split(',') LOGGER.debug("self.info['sensor']: " + str(self.info['sensor'])) if self.tbus_orbit and "orbit_number" in self.info: LOGGER.info("Changing orbit number by -1!") self.info["orbit_number"] -= 1 # replace values with corresponding aliases, if any are given if self.aliases: info = self.info.copy() for key in info: if key in self.aliases: self.info['orig_' + key] = self.info[key] self.info[key] = self.aliases[key][str(self.info[key])] # add start_time and end_time if not present try: base_time = self.info["time"] except KeyError: try: base_time = self.info["nominal_time"] except KeyError: base_time = self.info["start_time"] if "start_time" not in self.info: self.info["start_time"] = base_time if "start_date" in self.info: self.info["start_time"] = \ dt.datetime.combine(self.info["start_date"].date(), self.info["start_time"].time()) if "end_date" not in self.info: self.info["end_date"] = self.info["start_date"] del self.info["start_date"] if "end_date" in self.info: self.info["end_time"] = \ dt.datetime.combine(self.info["end_date"].date(), self.info["end_time"].time()) del self.info["end_date"] if "end_time" not in self.info and self.granule_length > 0: self.info["end_time"] = base_time + \ dt.timedelta(seconds=self.granule_length) if "end_time" in self.info: while self.info["start_time"] > self.info["end_time"]: self.info["end_time"] += dt.timedelta(days=1) if self.custom_vars is not None: for var_name in self.custom_vars: var_pattern = self.custom_vars[var_name] var_val = None if '%' in var_pattern: var_val = helper_functions.create_aligned_datetime_var( var_pattern, self.info) if var_val is None: var_val = compose(var_pattern, self.info) self.info[var_name] = var_val
def run(self): """Run the thread.""" self._loop = True # Parse settings for saving compression = self._save_settings.get('compression', 6) tags = self._save_settings.get('tags', None) fformat = self._save_settings.get('fformat', None) gdal_options = self._save_settings.get('gdal_options', None) blocksize = self._save_settings.get('blocksize', None) kwargs = { 'compression': compression, 'tags': tags, 'fformat': fformat, 'gdal_options': gdal_options, 'blocksize': blocksize } # Initialize publisher context with Publish("l2producer", port=self._port, nameservers=self._nameservers) as pub: while self._loop: if self.queue is not None: try: lcl = self.queue.get(True, 1) if self.prev_lock is not None: self.logger.debug( "Writer acquires lock of " "previous worker: %s", str(self.prev_lock)) utils.acquire_lock(self.prev_lock) self.queue.task_done() except Queue.Empty: # After all the items have been processed, release the # lock for the previous worker continue try: info = lcl.attrs.copy() product_config = lcl.attrs["product_config"] products = lcl.attrs["products"] except AttributeError: info = lcl.info.copy() product_config = lcl.info["product_config"] products = lcl.info["products"] # Available composite names composite_names = [dset.name for dset in lcl.keys()] for i, prod in enumerate(products): # Skip the removed composites if prod not in composite_names: continue fnames, _ = utils.create_fnames( info, product_config, prod) # Some of the files might have specific # writers, use them if configured writers = utils.get_writer_names( product_config, prod, info["area_id"]) for j, fname in enumerate(fnames): if writers[j]: self.logger.info("Saving %s with writer %s", fname, writers[j]) else: self.logger.info( "Saving %s with default writer", fname) lcl.save_dataset(prod, filename=fname, writer=writers[j], **kwargs) self.logger.info("Saved %s", fname) try: area = lcl[prod].attrs.get("area") except AttributeError: area = lcl[prod].info.get("area") try: area_data = { "name": area.name, "area_id": area.area_id, "proj_id": area.proj_id, "proj4": area.proj4_string, "shape": (area.x_size, area.y_size) } except AttributeError: area_data = None to_send = dict(info) if '*' \ in self._publish_vars else {} for dest_key in self._publish_vars: if dest_key != '*': to_send[dest_key] = info.get( self._publish_vars[dest_key] if isinstance(self._publish_vars, dict ) else dest_key) to_send_fix = { "nominal_time": info["start_time"], "uid": os.path.basename(fname), "uri": os.path.abspath(fname), "area": area_data, "productname": info["productname"] } to_send.update(to_send_fix) if self._topic is not None: topic = self._topic if area_data is not None: topic = compose(topic, area_data) else: topic = compose(topic, {'area_id': 'satproj'}) msg = Message(topic, "file", to_send) pub.send(str(msg)) self.logger.debug("Sent message: %s", str(msg)) del lcl lcl = None # After all the items have been processed, release the # lock for the previous worker if self.prev_lock is not None: utils.release_locks( [self.prev_lock], self.logger.debug, "Writer releses lock of " "previous worker: %s" % str(self.prev_lock)) else: time.sleep(1)
def update_nwp(params): LOG.info("METNO update nwp") tempfile.tempdir = params['options']['nwp_outdir'] ecmwf_path = params['options']['ecmwf_path'] if not os.path.exists(ecmwf_path): ecmwf_path = ecmwf_path.replace("storeB", "storeA") LOG.warning( "Need to replace storeB with storeA for ecmwf_path: {}".format( str(ecmwf_path))) filelist = glob( os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*")) if len(filelist) == 0: LOG.info("Found no input files! dir = " + str( os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*"))) return from trollsift import Parser, compose filelist.sort() for filename in filelist: if params['options']['ecmwf_file_name_sift'] is not None: try: parser = Parser(params['options']['ecmwf_file_name_sift']) except NoOptionError as noe: LOG.error("NoOptionError {}".format(noe)) continue if not parser.validate(os.path.basename(filename)): LOG.error( "Parser validate on filename: {} failed.".format(filename)) continue res = parser.parse("{}".format(os.path.basename(filename))) time_now = datetime.utcnow() if 'analysis_time' in res: if res['analysis_time'].year == 1900: # This is tricky. Filename is missing year in name # Need to guess the year from a compination of year now # and month now and month of the analysis time taken from the filename # If the month now is 1(January) and the analysis month is 12, # then the time has passed New Year, but the NWP analysis time is previous year. if time_now.month == 1 and res['analysis_time'].month == 12: analysis_year = time_now.year - 1 else: analysis_year = time_now.year res['analysis_time'] = res['analysis_time'].replace( year=analysis_year) else: LOG.error( "Can not parse analysis_time in file name. Check config and filename timestamp" ) if 'forecast_time' in res: if res['forecast_time'].year == 1900: # See above for explanation if res['analysis_time'].month == 12 and res[ 'forecast_time'].month == 1: forecast_year = res['analysis_time'].year + 1 else: forecast_year = res['analysis_time'].year res['forecast_time'] = res['forecast_time'].replace( year=forecast_year) else: LOG.error( "Can not parse forecast_time in file name. Check config and filename timestamp" ) forecast_time = res['forecast_time'] analysis_time = res['analysis_time'] step_delta = forecast_time - analysis_time step = "{:03d}H{:02d}M".format( int(step_delta.days * 24 + step_delta.seconds / 3600), 0) else: LOG.error("Not sift pattern given. Can not parse input NWP files") if analysis_time < params['starttime']: # LOG.debug("skip analysis time {} older than search time {}".format(analysis_time, params['starttime'])) continue if int(step[:3]) not in params['nlengths']: # LOG.debug("Skip step {}, not in {}".format(int(step[:3]), params['nlengths'])) continue output_parameters = {} output_parameters['analysis_time'] = analysis_time output_parameters['step_hour'] = int(step_delta.days * 24 + step_delta.seconds / 3600) output_parameters['step_min'] = 0 try: if not os.path.exists(params['options']['nwp_outdir']): os.makedirs(params['options']['nwp_outdir']) except OSError as e: LOG.error("Failed to create directory: %s", e) result_file = "" try: result_file = os.path.join( params['options']['nwp_outdir'], compose(params['options']['nwp_output'], output_parameters)) _result_file = os.path.join( params['options']['nwp_outdir'], compose("." + params['options']['nwp_output'], output_parameters)) _result_file_lock = os.path.join( params['options']['nwp_outdir'], compose("." + params['options']['nwp_output'] + ".lock", output_parameters)) except Exception as e: LOG.error( "Joining outdir with output for nwp failed with: {}".format(e)) LOG.info("Result file: {}".format(result_file)) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") continue import fcntl import errno import time rfl = open(_result_file_lock, 'w+') # do some locking while True: try: fcntl.flock(rfl, fcntl.LOCK_EX | fcntl.LOCK_NB) LOG.debug("1Got lock for NWP outfile: {}".format(result_file)) break except IOError as e: if e.errno != errno.EAGAIN: raise else: LOG.debug("Waiting for lock ... {}".format(result_file)) time.sleep(1) if os.path.exists(result_file): LOG.info("File: " + str(result_file) + " already there...") # Need to release the lock fcntl.flock(rfl, fcntl.LOCK_UN) rfl.close() continue fout = open(_result_file, 'wb') try: # Do the static fields # Note: field not in the filename variable, but a configured filename for static fields static_filename = params['options']['ecmwf_static_surface'] if not os.path.exists(static_filename): static_filename = static_filename.replace("storeB", "storeA") LOG.warning("Need to replace storeB with storeA") index_vals = [] index_keys = ['paramId', 'level'] LOG.debug("Start building index") LOG.debug("Handeling file: %s", filename) iid = ecc.codes_index_new_from_file(filename, index_keys) filename_n1s = filename.replace('N2D', 'N1S') LOG.debug("Add to index %s", filename_n1s) ecc.codes_index_add_file(iid, filename_n1s) LOG.debug("Add to index %s", static_filename) ecc.codes_index_add_file(iid, static_filename) LOG.debug("Done index") for key in index_keys: key_vals = ecc.codes_index_get(iid, key) key_vals = tuple(x for x in key_vals if x != 'undef') index_vals.append(key_vals) for prod in product(*index_vals): for i in range(len(index_keys)): ecc.codes_index_select(iid, index_keys[i], prod[i]) while 1: gid = ecc.codes_new_from_index(iid) if gid is None: break param = ecc.codes_get(gid, index_keys[0]) parameters = [ 172, 129, 235, 167, 168, 137, 130, 131, 132, 133, 134, 157 ] if param in parameters: LOG.debug("Doing param: %d", param) copy_needed_field(gid, fout) ecc.codes_release(gid) ecc.codes_index_release(iid) fout.close() os.rename(_result_file, result_file) except WrongLengthError as wle: LOG.error("Something wrong with the data: %s", wle) raise # In the end release the lock fcntl.flock(rfl, fcntl.LOCK_UN) rfl.close() os.remove(_result_file_lock) return
def create_dispatch_uri(ditem, fmat): """Create a uri from dispatch info.""" path = compose(ditem['path'], fmat) netloc = ditem.get('hostname', '') return urlunsplit((ditem.get('scheme', ''), netloc, path, '', ''))
def run(self): """Run the thread. """ with Publish("l2producer") as pub: while self._loop: try: obj, file_items, params = self.prod_queue.get(True, 1) except Queue.Empty: continue try: # Sort the file items in categories, to allow copying # similar ones. sorted_items = {} for item in file_items: attrib = item.attrib.copy() for key in ["output_dir", "thumbnail_name", "thumbnail_size"]: if key in attrib: del attrib[key] if 'format' not in attrib: attrib.setdefault('format', os.path.splitext(item.text)[1][1:]) key = tuple(sorted(attrib.items())) sorted_items.setdefault(key, []).append(item) local_params = params.copy() local_aliases = local_params['aliases'] for key, aliases in local_aliases.items(): if key in local_params: local_params[key] = aliases.get(params[key], params[key]) for item, copies in sorted_items.items(): attrib = dict(item) if attrib.get("overlay", "").startswith("#"): obj.add_overlay(hash_color(attrib.get("overlay"))) elif len(attrib.get("overlay", "")) > 0: LOGGER.debug("Adding overlay from config file") obj.add_overlay_config(attrib["overlay"]) fformat = attrib.get("format") # Actually save the data to disk. saved = False for copy in copies: output_dir = copy.attrib.get("output_dir", params["output_dir"]) fname = compose(os.path.join(output_dir, copy.text), local_params) if not saved: obj.save(fname, fformat=fformat, compression=copy.attrib.get("compression", 6)) LOGGER.info("Saved %s to %s", str(obj), fname) saved = fname else: link_or_copy(saved, fname) saved = fname if ("thumbnail_name" in copy.attrib and "thumbnail_size" in copy.attrib): thsize = [int(val) for val in copy.attrib["thumbnail_size"].split("x")] copy.attrib["thumbnail_size"] = thsize thname = compose(os.path.join(output_dir, copy.attrib["thumbnail_name"]), local_params) copy.attrib["thumbnail_name"] = thname thumbnail(fname, thname, thsize, fformat) msg = _create_message(obj, os.path.basename(fname), fname, params) pub.send(str(msg)) LOGGER.debug("Sent message %s", str(msg)) except: for item in file_items: if "thumbnail_size" in item.attrib: item.attrib["thumbnail_size"] = str( item.attrib["thumbnail_size"]) LOGGER.exception("Something wrong happened saving %s to %s", str(obj), str([tostring(item) for item in file_items])) finally: self.prod_queue.task_done()
mda["number"] + "/" + mda["level"]) if section not in config.sections(): logger.debug("Skipping %s", section) continue logger.debug("Starting catting for %s", section) cat = config.get(section, "cat") pattern = config.get(section, "pattern") mda["proc_time"] = datetime.utcnow() mda["end_time"] = msg.data[-1]["end_time"] try: min_length = config.getint(section, 'min_length') except NoOptionError: min_length = 0 if mda["end_time"] - mda["start_time"] < timedelta(minutes=min_length): logger.info('Pass too short, skipping: %s to %s', str(mda["start_time"]), str(mda["end_time"])) continue fname = compose(pattern, mda) mda["uri"] = fname mda["filename"] = os.path.basename(fname) if cat == "bz2": with open(fname, "wb") as out: for cmda in msg.data: infile = bz2.BZ2File(cmda["uri"], "r") out.write(infile.read()) infile.close() new_msg = Message(msg.subject, "file", mda) logger.info("Done") logger.debug("Sending %s", str(new_msg)) pub.send(str(new_msg))
logger.debug("Skipping %s", section) continue logger.debug("Starting catting for %s", section) cat = config.get(section, "cat") pattern = config.get(section, "pattern") mda["proc_time"] = datetime.utcnow() mda["end_time"] = msg.data[-1]["end_time"] try: min_length = config.getint(section, 'min_length') except NoOptionError: min_length = 0 if mda["end_time"] - mda["start_time"] < timedelta( minutes=min_length): logger.info('Pass too short, skipping: %s to %s', str(mda["start_time"]), str(mda["end_time"])) continue fname = compose(pattern, mda) mda["uri"] = fname mda["filename"] = os.path.basename(fname) if cat == "bz2": with open(fname, "wb") as out: for cmda in msg.data: infile = bz2.BZ2File(cmda["uri"], "r") out.write(infile.read()) infile.close() new_msg = Message(msg.subject, "file", mda) logger.info("Done") logger.debug("Sending %s", str(new_msg)) pub.send(str(new_msg))
def get_all(self, satellites): """Retrieve all the available scanlines from the stream, and save them. """ sat_last_seen = {} sat_lines = {} first_time = None for sat in satellites: sat_lines[sat] = {} queue = Queue() self.add_queue(queue) try: while self.loop: try: sat, utctime, senders = queue.get(True, 2) if sat not in satellites: continue if sat not in sat_last_seen: logger.info("Start receiving data for " + sat) logger.debug("Picking line " + " ".join([str(utctime), str(senders)])) # choose the highest quality, lowest ping time, highest elevation. sender_elevation_quality = sorted(senders, key=(lambda x: (x[2], -x[3], x[1]))) best_req = None for sender, elevation, quality, ping_time in reversed(sender_elevation_quality): best_req = self._requesters[sender.split(":")[0]] if best_req.jammed: continue sat_last_seen[sat] = datetime.utcnow(), elevation logger.debug("requesting " + " ".join([str(sat), str(utctime), str(sender), str(elevation)])) # TODO: this should be parallelized, and timed. In case of # TODO: Choking ? line = best_req.get_line(sat, utctime) if line is None: logger.warning("Could not retrieve line %s", str(utctime)) else: sat_lines[sat][utctime] = line if first_time is None and quality == 100: first_time = utctime break if best_req is None: logger.debug("No working connection, could not retrieve" " line %s", str(utctime)) continue except Empty: pass for sat, (utctime, elevation) in sat_last_seen.items(): if (utctime + CLIENT_TIMEOUT < datetime.utcnow() or (utctime + timedelta(seconds=3) < datetime.utcnow() and elevation < 0.5 and len(sat_lines[sat]) > 100)): # write the lines to file try: first_time = (first_time or min(sat_lines[sat].keys())) last_time = max(sat_lines[sat].keys()) logger.info(sat + " seems to be inactive now, writing file.") fdict = {} fdict["platform"], fdict["number"] = sat.split() fdict["utctime"] = first_time filename = compose(self._out, fdict) with open(filename, "wb") as fp_: for linetime in sorted(sat_lines[sat].keys()): fp_.write(sat_lines[sat][linetime]) if self._publisher: to_send = {} to_send["platform_name"] = true_names[sat] to_send["format"] = "HRPT" to_send["start_time"] = first_time to_send["end_time"] = last_time to_send["data_processing_level"] = "0" to_send["variant"] = 'DR' to_send["uid"] = os.path.basename( filename) fullname = os.path.realpath(filename) to_send["uri"] = urlunparse(("ssh", self._hostname, fullname, "", "", "")) if sat == "NOAA 15": to_send["sensor"] = ("avhrr/3", "amsu-a", "amsu-b", "hirs/3") elif sat in ["NOAA 19", "NOAA 18"]: to_send["sensor"] = ("avhrr/3", "mhs", "amsu-a", "hirs/4") to_send["type"] = "binary" msg = Message("/".join( ("", to_send["format"], to_send[ "data_processing_level"], self._station)), "file", to_send) logger.debug("publishing %s", str(msg)) self._publisher.send(str(msg)) except ValueError: logger.info("Got no lines for " + sat) continue finally: sat_lines[sat] = {} del sat_last_seen[sat] first_time = None except KeyboardInterrupt: for sat, (utctime, elevation) in sat_last_seen.items(): logger.info(sat + ": writing file.") first_time = (first_time or min(sat_lines[sat].keys())) filename = first_time.isoformat() + sat + ".hmf" with open(filename, "wb") as fp_: for linetime in sorted(sat_lines[sat].keys()): fp_.write(sat_lines[sat][linetime]) sat_lines[sat] = {} del sat_last_seen[sat] raise
def get_all(self, satellites): """Retrieve all the available scanlines from the stream, and save them. """ sat_last_seen = {} sat_lines = {} first_time = None for sat in satellites: sat_lines[sat] = {} queue = Queue() self.add_queue(queue) try: while self.loop: try: sat, utctime, senders = queue.get(True, 2) if sat not in satellites: continue if sat not in sat_last_seen: logger.info("Start receiving data for " + sat) logger.debug("Picking line " + " ".join([str(utctime), str(senders)])) # choose the highest quality, lowest ping time, highest elevation. sender_elevation_quality = sorted(senders, key=(lambda x: (x[2], -x[3], x[1]))) best_req = None for sender, elevation, quality, ping_time in reversed(sender_elevation_quality): best_req = self._requesters[sender.split(":")[0]] if best_req.jammed: continue sat_last_seen[sat] = datetime.utcnow(), elevation logger.debug("requesting " + " ".join([str(sat), str(utctime), str(sender), str(elevation)])) # TODO: this should be parallelized, and timed. In case of # TODO: Choking ? line = best_req.get_line(sat, utctime) if line is None: logger.warning("Could not retrieve line %s", str(utctime)) else: sat_lines[sat][utctime] = line if first_time is None and quality == 100: first_time = utctime break if best_req is None: logger.debug("No working connection, could not retrieve" " line %s", str(utctime)) continue except Empty: pass for sat, (utctime, elevation) in sat_last_seen.items(): if utctime + CLIENT_TIMEOUT < datetime.utcnow() or ( utctime + timedelta(seconds=3) < datetime.utcnow() and elevation < 0.5 and len(sat_lines[sat]) > 100 ): # write the lines to file try: first_time = first_time or min(sat_lines[sat].keys()) last_time = max(sat_lines[sat].keys()) logger.info(sat + " seems to be inactive now, writing file.") fdict = {} fdict["platform"], fdict["number"] = sat.split() fdict["utctime"] = first_time filename = compose(self._out, fdict) with open(filename, "wb") as fp_: for linetime in sorted(sat_lines[sat].keys()): fp_.write(sat_lines[sat][linetime]) if self._publisher: to_send = {} to_send["platform_name"] = true_names[sat] to_send["format"] = "HRPT" to_send["start_time"] = first_time to_send["end_time"] = last_time to_send["data_processing_level"] = "0" to_send["variant"] = "DR" to_send["uid"] = os.path.basename(filename) fullname = os.path.realpath(filename) to_send["uri"] = urlunparse(("ssh", self._hostname, fullname, "", "", "")) if sat == "NOAA 15": to_send["sensor"] = ("avhrr/3", "amsu-a", "amsu-b", "hirs/3") elif sat in ["NOAA 19", "NOAA 18"]: to_send["sensor"] = ("avhrr/3", "mhs", "amsu-a", "hirs/4") to_send["type"] = "binary" msg = Message( "/".join(("", to_send["format"], to_send["data_processing_level"], self._station)), "file", to_send, ) logger.debug("publishing %s", str(msg)) self._publisher.send(str(msg)) except ValueError: logger.info("Got no lines for " + sat) continue finally: sat_lines[sat] = {} del sat_last_seen[sat] first_time = None except KeyboardInterrupt: for sat, (utctime, elevation) in sat_last_seen.items(): logger.info(sat + ": writing file.") first_time = first_time or min(sat_lines[sat].keys()) filename = first_time.isoformat() + sat + ".hmf" with open(filename, "wb") as fp_: for linetime in sorted(sat_lines[sat].keys()): fp_.write(sat_lines[sat][linetime]) sat_lines[sat] = {} del sat_last_seen[sat] raise