def message_to_jobs(msg, product_list): """Convert a posttroll message *msg* to a list of jobs given a *product_list*.""" formats = product_list['product_list'].get('formats', None) for _product, pconfig in plist_iter(product_list['product_list'], level='product'): if 'formats' not in pconfig and formats is not None: pconfig['formats'] = copy.deepcopy(formats) jobs = OrderedDict() priorities = get_area_priorities(product_list) # TODO: check the uri is accessible from the current host. input_filenames = [urlparse(uri).path for uri in gen_dict_extract(msg.data, 'uri')] for prio, areas in priorities.items(): jobs[prio] = OrderedDict() jobs[prio]['input_filenames'] = input_filenames.copy() jobs[prio]['input_mda'] = msg.data.copy() jobs[prio]['product_list'] = {} for section in product_list: if section == 'product_list': if section not in jobs[prio]['product_list']: jobs[prio]['product_list'][section] = OrderedDict(product_list[section].copy()) del jobs[prio]['product_list'][section]['areas'] jobs[prio]['product_list'][section]['areas'] = OrderedDict() for area in areas: jobs[prio]['product_list'][section]['areas'][area] = product_list[section]['areas'][area] else: jobs[prio]['product_list'][section] = product_list[section] return jobs
def add_overviews(job): """Add overviews to images already written to disk.""" # Get the formats, including filenames and overview settings for _flat_fmat, fmt in plist_iter(job['product_list']['product_list']): if "overviews" in fmt and 'filename' in fmt: fname = fmt['filename'] overviews = fmt['overviews'] try: with rasterio.open(fname, 'r+') as dst: dst.build_overviews(overviews, Resampling.average) dst.update_tags(ns='rio_overview', resampling='average') LOG.info("Added overviews to %s", fname) except rasterio.RasterioIOError: pass
def save_datasets(job): """Save the datasets (and trigger the computation). If the `use_tmp_file` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) renames = {} for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): fname_pattern = fmat['fname_pattern'] filename = compose(os.path.join(fmat['output_dir'], fname_pattern), fmat) directory = fmat['output_dir'] if not os.path.exists(directory): os.makedirs(directory) if fmat.get('use_tmp_file', False): file_object = NamedTemporaryFile(delete=False, dir=directory) tmp_filename = file_object.name file_object.close() os.chmod(tmp_filename, 0o644) renames[tmp_filename] = filename filename = tmp_filename fmat.pop('format', None) fmat.pop('filename', None) try: # TODO: make these datasetIDs to take resolution into account res = fmat.get('resolution', None) dsid = DatasetID(name=fmat['product'], resolution=res, modifiers=None) objs.append(scns[fmat['area']].save_dataset(dsid, filename=filename, compute=False, **fmat_config)) except KeyError as err: LOG.info('Skipping %s: %s', fmat['productname'], str(err)) else: fmat_config['filename'] = renames.get(filename, filename) compute_writer_results(objs) for tmp_name, actual_name in renames.items(): os.rename(tmp_name, actual_name)
def __call__(self, job): """Call the publisher.""" mda = job['input_mda'].copy() mda.pop('dataset', None) mda.pop('collection', None) for fmat, fmat_config in plist_iter( job['product_list']['product_list'], mda): try: topic, file_mda = self.create_message(fmat, mda) except KeyError: LOG.debug('Could not create a message for %s.', str(fmat)) continue msg = Message(topic, 'file', file_mda) LOG.debug('Publishing %s', str(msg)) self.pub.send(str(msg)) self.send_dispatch_messages(fmat, fmat_config, topic, file_mda)
def load_composites(job): """Load composites given in the job's product_list.""" # composites = set().union(*(set(d.keys()) # for d in dpath.util.values(job['product_list'], '/product_list/areas/*/products'))) composites_by_res = {} for flat_prod_cfg, _prod_cfg in plist_iter( job['product_list']['product_list'], level='product'): res = flat_prod_cfg.get('resolution', None) composites_by_res.setdefault(res, set()).add(flat_prod_cfg['product']) scn = job['scene'] generate = job['product_list']['product_list'].get('delay_composites', True) is False for resolution, composites in composites_by_res.items(): LOG.info('Loading %s at resolution %s', str(composites), str(resolution)) scn.load(composites, resolution=resolution, generate=generate) job['scene'] = scn
def __call__(self, job): """Call the publisher.""" mda = job['input_mda'].copy() mda.pop('dataset', None) mda.pop('collection', None) for fmat, fmat_config in plist_iter( job['product_list']['product_list'], mda): resampled_scene = job['resampled_scenes'].get(fmat['area'], []) if product_missing_from_scene(fmat['product'], resampled_scene): LOG.debug('Not publishing missing product %s.', str(fmat)) continue try: topic, file_mda = self.create_message(fmat, mda) except KeyError: LOG.debug('Could not create a message for %s.', str(fmat)) continue msg = Message(topic, 'file', file_mda) LOG.info('Publishing %s', str(msg)) self.pub.send(str(msg)) self.send_dispatch_messages(fmat, fmat_config, topic, file_mda)
def save_datasets(job): """Save the datasets (and trigger the computation). If the `use_tmp_file` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) with renamed_files() as renames: for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): obj = save_dataset(job, scns, fmat, fmat_config, renames) if obj is not None: objs.append(obj) compute_writer_results(objs)
def save_datasets(job): """Save the datasets (and trigger the computation). If the ``use_tmp_file`` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. If the ``staging_zone`` option is provided in the product list, then the file will be created in this directory first, using either a temporary filename (if ``use_tmp_file`` is true) or the final filename (if ``use_tmp_file`` is false). This is useful for writers which write the filename to the headers, such as the Satpy ninjotiff and ninjogeotiff writers. The ``staging_zone`` directory must be on the same filesystem as ``output_dir``. When using those writers, it is recommended to set ``use_tmp_file`` to `False` when using a ``staging_zone`` directory, such that the filename written to the headers remains meaningful. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) eager_writing = job['product_list']['product_list'].get( "eager_writing", False) with renamed_files() as renames: for fmat, fmat_config in plist_iter( job['product_list']['product_list'], base_config): obj = save_dataset(scns, fmat, fmat_config, renames, compute=eager_writing) if obj is not None: objs.append(obj) job['produced_files'].put(fmat_config['filename']) if not eager_writing: compute_writer_results(objs)
def __call__(self, job): """Call the publisher.""" mda = job['input_mda'].copy() mda.pop('dataset', None) mda.pop('collection', None) for fmat, _fmat_config in plist_iter( job['product_list']['product_list']): prod_path = "/product_list/areas/%s/products/%s" % ( fmat['area'], fmat['product']) topic_pattern = get_config_value(job['product_list'], prod_path, "publish_topic") file_mda = mda.copy() try: file_mda['uri'] = fmat['filename'] except KeyError: continue file_mda['uid'] = os.path.basename(fmat['filename']) topic = compose(topic_pattern, fmat) msg = Message(topic, 'file', file_mda) LOG.debug('Publishing %s', str(msg)) self.pub.send(str(msg)) self.pub.stop()
def test_filepublisher_without_compose(self): from trollflow2.plugins import FilePublisher from trollflow2.dict_tools import plist_iter from trollsift import compose import os.path with mock.patch('trollflow2.plugins.Message') as message, mock.patch('trollflow2.plugins.NoisyPublisher'): pub = FilePublisher() pub.pub.start.assert_called_once() product_list = self.product_list.copy() product_list['product_list']['publish_topic'] = '/static_topic' job = {'product_list': product_list, 'input_mda': self.input_mda} topic_pattern = job['product_list']['product_list']['publish_topic'] topics = [] # Create filenames and topics for fmat, fmat_config in plist_iter(job['product_list']['product_list'], job['input_mda'].copy()): fname_pattern = fmat['fname_pattern'] filename = compose(os.path.join(fmat['output_dir'], fname_pattern), fmat) fmat.pop('format', None) fmat_config['filename'] = filename topics.append(compose(topic_pattern, fmat)) pub(job) message.assert_called() pub.pub.send.assert_called() pub.__del__() pub.pub.stop.assert_called() i = 0 for area in job['product_list']['product_list']['areas']: for prod in job['product_list']['product_list']['areas'][area]: # Skip calls to __str__ if 'call().__str__()' != str(message.mock_calls[i]): self.assertTrue(topics[i] in str(message.mock_calls[i])) i += 1
def test_iter(self): """Test plist_iter.""" from trollflow2.dict_tools import plist_iter prodlist = read_config(raw_string=yaml_test1)['product_list'] expected = [ { 'areaname': 'euron1_in_fname', 'area': 'euron1', 'productname': 'cloud_top_height_in_fname', 'product': 'cloud_top_height', # noqa 'min_coverage': 20.0, 'something': 'foo', 'output_dir': '/tmp/satdmz/pps/www/latest_2018/', 'format': 'png', 'writer': 'simple_image', 'fname_pattern': '{platform_name:s}_{start_time:%Y%m%d_%H%M}_{areaname:s}_ctth_static.{format}' }, { 'areaname': 'euron1_in_fname', 'area': 'euron1', 'productname': 'cloud_top_height_in_fname', 'product': 'cloud_top_height', 'fill_value': 0, # noqa 'min_coverage': 20.0, 'something': 'foo', 'output_dir': '/tmp/satdmz/pps/www/latest_2018/', 'format': 'jpg', 'writer': 'simple_image', 'fname_pattern': '{platform_name:s}_{start_time:%Y%m%d_%H%M}_{areaname:s}_ctth_static.{format}' }, { 'areaname': 'germ_in_fname', 'area': 'germ', 'productname': 'cloudtype_in_fname', 'product': 'cloudtype', # noqa 'output_dir': '/tmp/satdmz/pps/www/latest_2018/', 'min_coverage': 5.0, 'something': 'foo', 'fname_pattern': '{start_time:%Y%m%d_%H%M}_{areaname:s}_{productname}.{format}', 'format': 'png', 'writer': 'simple_image' }, { 'areaname': 'omerc_bb', 'area': 'omerc_bb', 'productname': 'ct', 'product': 'ct', 'min_coverage': 5.0, 'something': 'foo', # noqa 'output_dir': '/tmp', 'format': 'nc', 'writer': 'cf' }, { 'areaname': 'omerc_bb', 'area': 'omerc_bb', 'productname': 'cloud_top_height', 'product': 'cloud_top_height', # noqa 'output_dir': '/tmp', 'format': 'tif', 'min_coverage': 5.0, 'something': 'foo', 'writer': 'geotiff' } ] for i, exp in zip(plist_iter(prodlist), expected): self.assertDictEqual(i[0], exp) prodlist = read_config(raw_string=yaml_test2)['product_list'] for i, exp in zip(plist_iter(prodlist), expected): self.assertDictEqual(i[0], exp)