def _compute(self): """Compute the data.""" if self.data: self.logger.info("Processing and saving all data") compute_writer_results(self.data) if 'overviews' in self._save_settings: self._add_overviews() self._send_messages()
def test_geotiff(self): """Test writing to mitiff file.""" from satpy.writers import compute_writer_results fname = os.path.join(self.base_dir, 'geotiff.tif') res = self.scn.save_datasets(filename=fname, datasets=['test'], writer='geotiff', compute=False) compute_writer_results([res]) self.assertTrue(os.path.isfile(fname))
def test_simple_image(self): """Test writing to PNG file""" from satpy.writers import compute_writer_results fname = os.path.join(self.base_dir, 'simple_image.png') res = self.scn.save_datasets(filename=fname, datasets=['test'], writer='simple_image', compute=False) compute_writer_results([res]) self.assertTrue(os.path.isfile(fname))
def test_simple_delayed_write(self): from dask.delayed import Delayed from satpy.writers.simple_image import PillowWriter from satpy.writers import compute_writer_results datasets = self._get_test_datasets() w = PillowWriter(base_dir=self.base_dir) res = w.save_datasets(datasets, compute=False) for r__ in res: self.assertIsInstance(r__, Delayed) r__.compute() compute_writer_results(res)
def test_multiple_simple(self): """Test writing to geotiff files.""" from satpy.writers import compute_writer_results fname1 = os.path.join(self.base_dir, 'simple_image1.png') res1 = self.scn.save_datasets(filename=fname1, datasets=['test'], writer='simple_image', compute=False) fname2 = os.path.join(self.base_dir, 'simple_image2.png') res2 = self.scn.save_datasets(filename=fname2, datasets=['test'], writer='simple_image', compute=False) compute_writer_results([res1, res2]) self.assertTrue(os.path.isfile(fname1)) self.assertTrue(os.path.isfile(fname2))
def test_mixed(self): """Test writing to multiple mixed-type files.""" from satpy.writers import compute_writer_results fname1 = os.path.join(self.base_dir, 'simple_image3.png') res1 = self.scn.save_datasets(filename=fname1, datasets=['test'], writer='simple_image', compute=False) fname2 = os.path.join(self.base_dir, 'geotiff3.tif') res2 = self.scn.save_datasets(filename=fname2, datasets=['test'], writer='geotiff', compute=False) res3 = [] compute_writer_results([res1, res2, res3]) self.assertTrue(os.path.isfile(fname1)) self.assertTrue(os.path.isfile(fname2))
def save_datasets(job): """Save the datasets (and trigger the computation). If the `use_tmp_file` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) renames = {} for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): fname_pattern = fmat['fname_pattern'] filename = compose(os.path.join(fmat['output_dir'], fname_pattern), fmat) directory = fmat['output_dir'] if not os.path.exists(directory): os.makedirs(directory) if fmat.get('use_tmp_file', False): file_object = NamedTemporaryFile(delete=False, dir=directory) tmp_filename = file_object.name file_object.close() os.chmod(tmp_filename, 0o644) renames[tmp_filename] = filename filename = tmp_filename fmat.pop('format', None) fmat.pop('filename', None) try: # TODO: make these datasetIDs to take resolution into account res = fmat.get('resolution', None) dsid = DatasetID(name=fmat['product'], resolution=res, modifiers=None) objs.append(scns[fmat['area']].save_dataset(dsid, filename=filename, compute=False, **fmat_config)) except KeyError as err: LOG.info('Skipping %s: %s', fmat['productname'], str(err)) else: fmat_config['filename'] = renames.get(filename, filename) compute_writer_results(objs) for tmp_name, actual_name in renames.items(): os.rename(tmp_name, actual_name)
def save_datasets(job): scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.update(job['product_list']['common']) base_config.pop('dataset', None) for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): fname_pattern = fmat['fname_pattern'] outdir = fmat['output_dir'] filename = compose(os.path.join(outdir, fname_pattern), fmat) fmat.pop('format', None) objs.append(scns[fmat['areaname']].save_dataset(fmat['productname'], filename=filename, compute=False, **fmat)) fmat_config['filename'] = filename compute_writer_results(objs)
def save_datasets(job): """Save the datasets (and trigger the computation). If the `use_tmp_file` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) with renamed_files() as renames: for fmat, fmat_config in plist_iter(job['product_list']['product_list'], base_config): obj = save_dataset(job, scns, fmat, fmat_config, renames) if obj is not None: objs.append(obj) compute_writer_results(objs)
def save_datasets(job): """Save the datasets (and trigger the computation). If the ``use_tmp_file`` option is provided in the product list and is set to True, the file will be first saved to a temporary name before being renamed. This is useful when other processes are waiting for the file to be present to start their work, but would crash on incomplete files. If the ``staging_zone`` option is provided in the product list, then the file will be created in this directory first, using either a temporary filename (if ``use_tmp_file`` is true) or the final filename (if ``use_tmp_file`` is false). This is useful for writers which write the filename to the headers, such as the Satpy ninjotiff and ninjogeotiff writers. The ``staging_zone`` directory must be on the same filesystem as ``output_dir``. When using those writers, it is recommended to set ``use_tmp_file`` to `False` when using a ``staging_zone`` directory, such that the filename written to the headers remains meaningful. """ scns = job['resampled_scenes'] objs = [] base_config = job['input_mda'].copy() base_config.pop('dataset', None) eager_writing = job['product_list']['product_list'].get( "eager_writing", False) with renamed_files() as renames: for fmat, fmat_config in plist_iter( job['product_list']['product_list'], base_config): obj = save_dataset(scns, fmat, fmat_config, renames, compute=eager_writing) if obj is not None: objs.append(obj) job['produced_files'].put(fmat_config['filename']) if not eager_writing: compute_writer_results(objs)
def test_empty(self): """Test empty result list""" from satpy.writers import compute_writer_results compute_writer_results([])
def main(argv=sys.argv[1:]): global LOG from satpy import Scene from satpy.writers import compute_writer_results from dask.diagnostics import ProgressBar from polar2grid.core.script_utils import ( setup_logging, rename_log_file, create_exc_handler, ) import argparse add_polar2grid_config_paths() USE_POLAR2GRID_DEFAULTS = bool( int(os.environ.setdefault("USE_POLAR2GRID_DEFAULTS", "1"))) BINARY_NAME = "polar2grid" if USE_POLAR2GRID_DEFAULTS else "geo2grid" prog = os.getenv("PROG_NAME", sys.argv[0]) # "usage: " will be printed at the top of this: usage = """ %(prog)s -h see available products: %(prog)s -r <reader> -w <writer> --list-products -f file1 [file2 ...] basic processing: %(prog)s -r <reader> -w <writer> [options] -f file1 [file2 ...] basic processing with limited products: %(prog)s -r <reader> -w <writer> [options] -p prod1 prod2 -f file1 [file2 ...] """ parser = argparse.ArgumentParser( prog=prog, usage=usage, fromfile_prefix_chars="@", description="Load, composite, resample, and save datasets.", ) parser.add_argument( "-v", "--verbose", dest="verbosity", action="count", default=0, help="each occurrence increases verbosity 1 level through " "ERROR-WARNING-INFO-DEBUG (default INFO)", ) parser.add_argument("-l", "--log", dest="log_fn", default=None, help="specify the log filename") parser.add_argument( "--progress", action="store_true", help="show processing progress bar (not recommended for logged output)", ) parser.add_argument( "--num-workers", type=int, default=os.getenv("DASK_NUM_WORKERS", 4), help="specify number of worker threads to use (default: 4)", ) parser.add_argument( "--match-resolution", dest="preserve_resolution", action="store_false", help="When using the 'native' resampler for composites, don't save data " "at its native resolution, use the resolution used to create the " "composite.", ) parser.add_argument( "--list-products", dest="list_products", action="store_true", help="List available {} products and exit".format(BINARY_NAME), ) parser.add_argument( "--list-products-all", dest="list_products_all", action="store_true", help="List available {} products and custom/Satpy products and exit". format(BINARY_NAME), ) reader_group = add_scene_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] resampling_group = add_resample_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] writer_group = add_writer_argument_groups(parser)[0] argv_without_help = [x for x in argv if x not in ["-h", "--help"]] _retitle_optional_arguments(parser) args, remaining_args = parser.parse_known_args(argv_without_help) os.environ["DASK_NUM_WORKERS"] = str(args.num_workers) # get the logger if we know the readers and writers that will be used if args.readers is not None and args.writers is not None: glue_name = args.readers[0] + "_" + "-".join(args.writers or []) LOG = logging.getLogger(glue_name) reader_subgroups = _add_component_parser_args(parser, "readers", args.readers or []) writer_subgroups = _add_component_parser_args(parser, "writers", args.writers or []) args = parser.parse_args(argv) if args.readers is None: parser.print_usage() parser.exit( 1, "\nERROR: Reader must be provided (-r flag).\n" "Supported readers:\n\t{}\n".format("\n\t".join( ["abi_l1b", "ahi_hsd", "hrit_ahi"])), ) elif len(args.readers) > 1: parser.print_usage() parser.exit( 1, "\nMultiple readers is not currently supported. Got:\n\t" "{}\n".format("\n\t".join(args.readers)), ) return -1 if args.writers is None: parser.print_usage() parser.exit( 1, "\nERROR: Writer must be provided (-w flag) with one or more writer.\n" "Supported writers:\n\t{}\n".format("\n\t".join(["geotiff"])), ) reader_args = _args_to_dict(args, reader_group._group_actions) reader_names = reader_args.pop("readers") scene_creation, load_args = _get_scene_init_load_args( args, reader_args, reader_names, reader_subgroups) resample_args = _args_to_dict(args, resampling_group._group_actions) writer_args = _args_to_dict(args, writer_group._group_actions) writer_specific_args = _parse_writer_args(writer_args["writers"], writer_subgroups, reader_names, args) writer_args.update(writer_specific_args) if not args.filenames: parser.print_usage() parser.exit(1, "\nERROR: No data files provided (-f flag)\n") # Prepare logging rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) logging.getLogger("rasterio").setLevel(levels[min(2, args.verbosity)]) sys.excepthook = create_exc_handler(LOG.name) if levels[min(3, args.verbosity)] > logging.DEBUG: import warnings warnings.filterwarnings("ignore") LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Set up dask and the number of workers if args.num_workers: dask.config.set(num_workers=args.num_workers) # Create a Scene, analyze the provided files LOG.info("Sorting and reading input files...") try: scn = Scene(**scene_creation) except ValueError as e: LOG.error( "{} | Enable debug message (-vvv) or see log file for details.". format(str(e))) LOG.debug("Further error information: ", exc_info=True) return -1 except OSError: LOG.error( "Could not open files. Enable debug message (-vvv) or see log file for details." ) LOG.debug("Further error information: ", exc_info=True) return -1 # Rename the log file if rename_log: rename_log_file(glue_name + scn.attrs["start_time"].strftime("_%Y%m%d_%H%M%S.log")) # Load the actual data arrays and metadata (lazy loaded as dask arrays) LOG.info("Loading product metadata from files...") reader_info = ReaderProxyBase.from_reader_name(scene_creation["reader"], scn, load_args["products"]) if args.list_products or args.list_products_all: _print_list_products(reader_info, p2g_only=not args.list_products_all) return 0 load_args["products"] = reader_info.get_satpy_products_to_load() if not load_args["products"]: return -1 scn.load(load_args["products"]) ll_bbox = resample_args.pop("ll_bbox") if ll_bbox: scn = scn.crop(ll_bbox=ll_bbox) scn = filter_scene( scn, reader_names, sza_threshold=reader_args["sza_threshold"], day_fraction=reader_args["filter_day_products"], night_fraction=reader_args["filter_night_products"], ) if scn is None: LOG.info("No remaining products after filtering.") return 0 to_save = [] areas_to_resample = resample_args.pop("grids") if "ewa_persist" in resample_args: resample_args["persist"] = resample_args.pop("ewa_persist") scenes_to_save = resample_scene( scn, areas_to_resample, preserve_resolution=args.preserve_resolution, is_polar2grid=USE_POLAR2GRID_DEFAULTS, **resample_args) for scene_to_save, products_to_save in scenes_to_save: overwrite_platform_name_with_aliases(scene_to_save) reader_info.apply_p2g_name_to_scene(scene_to_save) to_save = write_scene( scene_to_save, writer_args["writers"], writer_args, products_to_save, to_save=to_save, ) if args.progress: pbar = ProgressBar() pbar.register() LOG.info("Computing products and saving data to writers...") compute_writer_results(to_save) LOG.info("SUCCESS") return 0
def save_datasets( self, dataset: list[xr.DataArray], filename=None, dtype=None, append=True, compute=True, chunks=None, **kwargs, ): """Save HDF5 datasets.""" compression = kwargs.pop("compression", None) if "compression" in kwargs else None if compression == "none": compression = None add_geolocation = kwargs.pop("add_geolocation") if "add_geolocation" in kwargs else False # will this be written to one or multiple files? output_names = [] for dataset_id in dataset: file_attrs = self._output_file_kwargs(dataset_id, dtype) out_filename = filename or self.get_filename(**file_attrs) output_names.append(out_filename) filename = output_names[0] if not all_equal(output_names): LOG.warning("More than one output filename possible. " "Writing to only '{}'.".format(filename)) HDF5_fh = self.open_HDF5_filehandle(filename, append=append) datasets_by_area = self.iter_by_area(dataset) # Initialize source/targets at start of each new AREA grouping. dsets = [] targets = [] for area, data_arrs in datasets_by_area: # open HDF5 file handle, check if group already exists. parent_group = self.create_proj_group(filename, HDF5_fh, area) if add_geolocation: chunks = data_arrs[0].chunks geo_sets, fnames = self.write_geolocation( HDF5_fh, filename, parent_group, area, dtype, append, compression, chunks ) dsets.append(geo_sets) targets.append(fnames) for data_arr in data_arrs: hdf_subgroup = "{}/{}".format(parent_group, data_arr.attrs["p2g_name"]) file_var = FakeHDF5(filename, hdf_subgroup) self.create_variable(filename, HDF5_fh, hdf_subgroup, data_arr, dtype, compression) dsets.append(data_arr.data) targets.append(file_var) results = (dsets, targets) if compute: LOG.info("Computing and writing results...") return compute_writer_results([results]) targets, sources, delayeds = split_results([results]) if delayeds: # This writer had only delayed writes return delayeds else: return targets, sources
def main(argv=sys.argv[1:]): global LOG from satpy import Scene from satpy.resample import get_area_def from satpy.writers import compute_writer_results from dask.diagnostics import ProgressBar from polar2grid.core.script_utils import ( setup_logging, rename_log_file, create_exc_handler) import argparse prog = os.getenv('PROG_NAME', sys.argv[0]) # "usage: " will be printed at the top of this: usage = """ %(prog)s -h see available products: %(prog)s -r <reader> -w <writer> --list-products -f file1 [file2 ...] basic processing: %(prog)s -r <reader> -w <writer> [options] -f file1 [file2 ...] basic processing with limited products: %(prog)s -r <reader> -w <writer> [options] -p prod1 prod2 -f file1 [file2 ...] """ parser = argparse.ArgumentParser(prog=prog, usage=usage, description="Load, composite, resample, and save datasets.") parser.add_argument('-v', '--verbose', dest='verbosity', action="count", default=0, help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)') parser.add_argument('-l', '--log', dest="log_fn", default=None, help="specify the log filename") parser.add_argument('--progress', action='store_true', help="show processing progress bar (not recommended for logged output)") parser.add_argument('--num-workers', type=int, default=4, help="specify number of worker threads to use (default: 4)") parser.add_argument('--match-resolution', dest='preserve_resolution', action='store_false', help="When using the 'native' resampler for composites, don't save data " "at its native resolution, use the resolution used to create the " "composite.") parser.add_argument('-w', '--writers', nargs='+', help='writers to save datasets with') parser.add_argument("--list-products", dest="list_products", action="store_true", help="List available reader products and exit") subgroups = add_scene_argument_groups(parser) subgroups += add_resample_argument_groups(parser) argv_without_help = [x for x in argv if x not in ["-h", "--help"]] args, remaining_args = parser.parse_known_args(argv_without_help) # get the logger if we know the readers and writers that will be used if args.reader is not None and args.writers is not None: glue_name = args.reader + "_" + "-".join(args.writers or []) LOG = logging.getLogger(glue_name) # add writer arguments if args.writers is not None: for writer in (args.writers or []): parser_func = WRITER_PARSER_FUNCTIONS.get(writer) if parser_func is None: continue subgroups += parser_func(parser) args = parser.parse_args(argv) if args.reader is None: parser.print_usage() parser.exit(1, "\nERROR: Reader must be provided (-r flag).\n" "Supported readers:\n\t{}\n".format('\n\t'.join(['abi_l1b', 'ahi_hsd', 'hrit_ahi']))) if args.writers is None: parser.print_usage() parser.exit(1, "\nERROR: Writer must be provided (-w flag) with one or more writer.\n" "Supported writers:\n\t{}\n".format('\n\t'.join(['geotiff']))) def _args_to_dict(group_actions): return {ga.dest: getattr(args, ga.dest) for ga in group_actions if hasattr(args, ga.dest)} scene_args = _args_to_dict(subgroups[0]._group_actions) load_args = _args_to_dict(subgroups[1]._group_actions) resample_args = _args_to_dict(subgroups[2]._group_actions) writer_args = {} for idx, writer in enumerate(args.writers): sgrp1, sgrp2 = subgroups[3 + idx * 2: 5 + idx * 2] wargs = _args_to_dict(sgrp1._group_actions) if sgrp2 is not None: wargs.update(_args_to_dict(sgrp2._group_actions)) writer_args[writer] = wargs # get default output filename if 'filename' in wargs and wargs['filename'] is None: wargs['filename'] = get_default_output_filename(args.reader, writer) if not args.filenames: parser.print_usage() parser.exit(1, "\nERROR: No data files provided (-f flag)\n") # Prepare logging rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) logging.getLogger('rasterio').setLevel(levels[min(2, args.verbosity)]) sys.excepthook = create_exc_handler(LOG.name) if levels[min(3, args.verbosity)] > logging.DEBUG: import warnings warnings.filterwarnings("ignore") LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Set up dask and the number of workers if args.num_workers: from multiprocessing.pool import ThreadPool dask.config.set(pool=ThreadPool(args.num_workers)) # Parse provided files and search for files if provided directories scene_args['filenames'] = get_input_files(scene_args['filenames']) # Create a Scene, analyze the provided files LOG.info("Sorting and reading input files...") try: scn = Scene(**scene_args) except ValueError as e: LOG.error("{} | Enable debug message (-vvv) or see log file for details.".format(str(e))) LOG.debug("Further error information: ", exc_info=True) return -1 except OSError: LOG.error("Could not open files. Enable debug message (-vvv) or see log file for details.") LOG.debug("Further error information: ", exc_info=True) return -1 if args.list_products: print("\n".join(sorted(scn.available_dataset_names(composites=True)))) return 0 # Rename the log file if rename_log: rename_log_file(glue_name + scn.attrs['start_time'].strftime("_%Y%m%d_%H%M%S.log")) # Load the actual data arrays and metadata (lazy loaded as dask arrays) if load_args['products'] is None: try: reader_mod = importlib.import_module('polar2grid.readers.' + scene_args['reader']) load_args['products'] = reader_mod.DEFAULT_PRODUCTS LOG.info("Using default product list: {}".format(load_args['products'])) except (ImportError, AttributeError): LOG.error("No default products list set, please specify with `--products`.") return -1 LOG.info("Loading product metadata from files...") scn.load(load_args['products']) resample_kwargs = resample_args.copy() areas_to_resample = resample_kwargs.pop('grids') grid_configs = resample_kwargs.pop('grid_configs') resampler = resample_kwargs.pop('resampler') if areas_to_resample is None and resampler in [None, 'native']: # no areas specified areas_to_resample = ['MAX'] elif areas_to_resample is None: raise ValueError("Resampling method specified (--method) without any destination grid/area (-g flag).") elif not areas_to_resample: # they don't want any resampling (they used '-g' with no args) areas_to_resample = [None] has_custom_grid = any(g not in ['MIN', 'MAX', None] for g in areas_to_resample) if has_custom_grid and resampler == 'native': LOG.error("Resampling method 'native' can only be used with 'MIN' or 'MAX' grids " "(use 'nearest' method instead).") return -1 p2g_grid_configs = [x for x in grid_configs if x.endswith('.conf')] pyresample_area_configs = [x for x in grid_configs if not x.endswith('.conf')] if not grid_configs or p2g_grid_configs: # if we were given p2g grid configs or we weren't given any to choose from from polar2grid.grids import GridManager grid_manager = GridManager(*p2g_grid_configs) else: grid_manager = {} if pyresample_area_configs: from pyresample.utils import parse_area_file custom_areas = parse_area_file(pyresample_area_configs) custom_areas = {x.area_id: x for x in custom_areas} else: custom_areas = {} ll_bbox = resample_kwargs.pop('ll_bbox') if ll_bbox: scn = scn.crop(ll_bbox=ll_bbox) wishlist = scn.wishlist.copy() preserve_resolution = get_preserve_resolution(args, resampler, areas_to_resample) if preserve_resolution: preserved_products = set(wishlist) & set(scn.datasets.keys()) resampled_products = set(wishlist) - preserved_products # original native scene to_save = write_scene(scn, args.writers, writer_args, preserved_products) else: preserved_products = set() resampled_products = set(wishlist) to_save = [] LOG.debug("Products to preserve resolution for: {}".format(preserved_products)) LOG.debug("Products to use new resolution for: {}".format(resampled_products)) for area_name in areas_to_resample: if area_name is None: # no resampling area_def = None elif area_name == 'MAX': area_def = scn.max_area() elif area_name == 'MIN': area_def = scn.min_area() elif area_name in custom_areas: area_def = custom_areas[area_name] elif area_name in grid_manager: from pyresample.geometry import DynamicAreaDefinition p2g_def = grid_manager[area_name] area_def = p2g_def.to_satpy_area() if isinstance(area_def, DynamicAreaDefinition) and p2g_def['cell_width'] is not None: area_def = area_def.freeze(scn.max_area(), resolution=(abs(p2g_def['cell_width']), abs(p2g_def['cell_height']))) else: area_def = get_area_def(area_name) if resampler is None and area_def is not None: rs = 'native' if area_name in ['MIN', 'MAX'] else 'nearest' LOG.debug("Setting default resampling to '{}' for grid '{}'".format(rs, area_name)) else: rs = resampler if area_def is not None: LOG.info("Resampling data to '%s'", area_name) new_scn = scn.resample(area_def, resampler=rs, **resample_kwargs) elif not preserve_resolution: # the user didn't want to resample to any areas # the user also requested that we don't preserve resolution # which means we have to save this Scene's datasets # because they won't be saved new_scn = scn to_save = write_scene(new_scn, args.writers, writer_args, resampled_products, to_save=to_save) if args.progress: pbar = ProgressBar() pbar.register() LOG.info("Computing products and saving data to writers...") compute_writer_results(to_save) LOG.info("SUCCESS") return 0
def main(argv=sys.argv[1:]): global LOG import satpy from satpy import Scene from satpy.writers import compute_writer_results from dask.diagnostics import ProgressBar from polar2grid.core.script_utils import (setup_logging, rename_log_file, create_exc_handler) import argparse dist = pkg_resources.get_distribution('polar2grid') if dist_is_editable(dist): p2g_etc = os.path.join(dist.module_path, 'etc') else: p2g_etc = os.path.join(sys.prefix, 'etc', 'polar2grid') config_path = satpy.config.get('config_path') if p2g_etc not in config_path: satpy.config.set(config_path=config_path + [p2g_etc]) USE_POLAR2GRID_DEFAULTS = bool( int(os.environ.setdefault("USE_POLAR2GRID_DEFAULTS", "1"))) prog = os.getenv('PROG_NAME', sys.argv[0]) # "usage: " will be printed at the top of this: usage = """ %(prog)s -h see available products: %(prog)s -r <reader> -w <writer> --list-products -f file1 [file2 ...] basic processing: %(prog)s -r <reader> -w <writer> [options] -f file1 [file2 ...] basic processing with limited products: %(prog)s -r <reader> -w <writer> [options] -p prod1 prod2 -f file1 [file2 ...] """ parser = argparse.ArgumentParser( prog=prog, usage=usage, fromfile_prefix_chars="@", description="Load, composite, resample, and save datasets.") parser.add_argument( '-v', '--verbose', dest='verbosity', action="count", default=0, help='each occurrence increases verbosity 1 level through ' 'ERROR-WARNING-INFO-DEBUG (default INFO)') parser.add_argument('-l', '--log', dest="log_fn", default=None, help="specify the log filename") parser.add_argument( '--progress', action='store_true', help="show processing progress bar (not recommended for logged output)" ) parser.add_argument( '--num-workers', type=int, default=os.getenv('DASK_NUM_WORKERS', 4), help="specify number of worker threads to use (default: 4)") parser.add_argument( '--match-resolution', dest='preserve_resolution', action='store_false', help="When using the 'native' resampler for composites, don't save data " "at its native resolution, use the resolution used to create the " "composite.") parser.add_argument("--list-products", dest="list_products", action="store_true", help="List available reader products and exit") reader_group = add_scene_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] resampling_group = add_resample_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] writer_group = add_writer_argument_groups(parser)[0] subgroups = [reader_group, resampling_group, writer_group] argv_without_help = [x for x in argv if x not in ["-h", "--help"]] _retitle_optional_arguments(parser) args, remaining_args = parser.parse_known_args(argv_without_help) os.environ['DASK_NUM_WORKERS'] = str(args.num_workers) # get the logger if we know the readers and writers that will be used if args.readers is not None and args.writers is not None: glue_name = args.readers[0] + "_" + "-".join(args.writers or []) LOG = logging.getLogger(glue_name) # add writer arguments for writer in (args.writers or []): parser_func = WRITER_PARSER_FUNCTIONS.get(writer) if parser_func is None: continue subgroups += parser_func(parser) args = parser.parse_args(argv) if args.readers is None: parser.print_usage() parser.exit( 1, "\nERROR: Reader must be provided (-r flag).\n" "Supported readers:\n\t{}\n".format('\n\t'.join( ['abi_l1b', 'ahi_hsd', 'hrit_ahi']))) elif len(args.readers) > 1: parser.print_usage() parser.exit( 1, "\nMultiple readers is not currently supported. Got:\n\t" "{}\n".format('\n\t'.join(args.readers))) return -1 if args.writers is None: parser.print_usage() parser.exit( 1, "\nERROR: Writer must be provided (-w flag) with one or more writer.\n" "Supported writers:\n\t{}\n".format('\n\t'.join(['geotiff']))) def _args_to_dict(group_actions, exclude=None): if exclude is None: exclude = [] return { ga.dest: getattr(args, ga.dest) for ga in group_actions if hasattr(args, ga.dest) and ga.dest not in exclude } reader_args = _args_to_dict(reader_group._group_actions) reader_names = reader_args.pop('readers') scene_creation = { 'filenames': reader_args.pop('filenames'), 'reader': reader_names[0], } load_args = { 'products': reader_args.pop('products'), } # anything left in 'reader_args' is a reader-specific kwarg resample_args = _args_to_dict(resampling_group._group_actions) writer_args = _args_to_dict(writer_group._group_actions) # writer_args = {} subgroup_idx = 3 for idx, writer in enumerate(writer_args['writers']): sgrp1, sgrp2 = subgroups[subgroup_idx + idx * 2:subgroup_idx + 2 + idx * 2] wargs = _args_to_dict(sgrp1._group_actions) if sgrp2 is not None: wargs.update(_args_to_dict(sgrp2._group_actions)) writer_args[writer] = wargs # get default output filename if 'filename' in wargs and wargs['filename'] is None: wargs['filename'] = get_default_output_filename( args.readers[0], writer) if not args.filenames: parser.print_usage() parser.exit(1, "\nERROR: No data files provided (-f flag)\n") # Prepare logging rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) logging.getLogger('rasterio').setLevel(levels[min(2, args.verbosity)]) sys.excepthook = create_exc_handler(LOG.name) if levels[min(3, args.verbosity)] > logging.DEBUG: import warnings warnings.filterwarnings("ignore") LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Set up dask and the number of workers if args.num_workers: dask.config.set(num_workers=args.num_workers) # Parse provided files and search for files if provided directories scene_creation['filenames'] = get_input_files(scene_creation['filenames']) # Create a Scene, analyze the provided files LOG.info("Sorting and reading input files...") try: scn = Scene(**scene_creation) except ValueError as e: LOG.error( "{} | Enable debug message (-vvv) or see log file for details.". format(str(e))) LOG.debug("Further error information: ", exc_info=True) return -1 except OSError: LOG.error( "Could not open files. Enable debug message (-vvv) or see log file for details." ) LOG.debug("Further error information: ", exc_info=True) return -1 if args.list_products: print("\n".join(sorted(scn.available_dataset_names(composites=True)))) return 0 # Rename the log file if rename_log: rename_log_file(glue_name + scn.attrs['start_time'].strftime("_%Y%m%d_%H%M%S.log")) # Load the actual data arrays and metadata (lazy loaded as dask arrays) LOG.info("Loading product metadata from files...") load_args['products'] = _apply_default_products_and_aliases( scn, scene_creation['reader'], load_args['products']) if not load_args['products']: return -1 scn.load(load_args['products']) ll_bbox = resample_args.pop('ll_bbox') if ll_bbox: scn = scn.crop(ll_bbox=ll_bbox) scn = filter_scene( scn, reader_names, sza_threshold=reader_args['sza_threshold'], day_fraction=reader_args['filter_day_products'], night_fraction=reader_args['filter_night_products'], ) if scn is None: LOG.info("No remaining products after filtering.") return 0 to_save = [] areas_to_resample = resample_args.pop("grids") if 'ewa_persist' in resample_args: resample_args['persist'] = resample_args.pop('ewa_persist') scenes_to_save = resample_scene( scn, areas_to_resample, preserve_resolution=args.preserve_resolution, is_polar2grid=USE_POLAR2GRID_DEFAULTS, **resample_args) for scene_to_save, products_to_save in scenes_to_save: overwrite_platform_name_with_aliases(scene_to_save) to_save = write_scene(scene_to_save, writer_args['writers'], writer_args, products_to_save, to_save=to_save) if args.progress: pbar = ProgressBar() pbar.register() LOG.info("Computing products and saving data to writers...") compute_writer_results(to_save) LOG.info("SUCCESS") return 0