def _process_composite_config(self, composite_name, conf, composite_type, sensor_id, composite_config, **kwargs): compositors = self.compositors[sensor_id] modifiers = self.modifiers[sensor_id] try: options = conf[composite_type][composite_name] loader = options.pop('compositor') except KeyError: if composite_name in compositors or composite_name in modifiers: return conf raise ValueError("'compositor' missing or empty in %s" % composite_config) options['name'] = composite_name for prereq_type in ['prerequisites', 'optional_prerequisites']: prereqs = [] for item in options.get(prereq_type, []): if isinstance(item, dict): key = DatasetID.from_dict(item) prereqs.append(key) else: prereqs.append(item) options[prereq_type] = prereqs if composite_type == 'composites': options.update(**kwargs) key = DatasetID.from_dict(options) comp = loader(**options) compositors[key] = comp elif composite_type == 'modifiers': modifiers[composite_name] = loader, options
def _slice_datasets(self, dataset_ids, slice_key, new_area, area_only=True): """Slice scene in-place for the datasets specified.""" new_datasets = {} datasets = (self[ds_id] for ds_id in dataset_ids) for ds, parent_ds in dataset_walker(datasets): ds_id = DatasetID.from_dict(ds.attrs) # handle ancillary variables pres = None if parent_ds is not None: pres = new_datasets[DatasetID.from_dict(parent_ds.attrs)] if ds_id in new_datasets: replace_anc(ds, pres) continue if area_only and ds.attrs.get('area') is None: new_datasets[ds_id] = ds replace_anc(ds, pres) continue if not isinstance(slice_key, dict): # match dimension name to slice object key = dict(zip(ds.dims, slice_key)) else: key = slice_key new_ds = ds.isel(**key) if new_area is not None: new_ds.attrs['area'] = new_area new_datasets[ds_id] = new_ds if parent_ds is None: # don't use `__setitem__` because we don't want this to # affect the existing wishlist/dep tree self.datasets[ds_id] = new_ds else: replace_anc(new_ds, pres)
def _resampled_scene(self, new_scn, destination_area, **resample_kwargs): """Resample `datasets` to the `destination` area.""" new_datasets = {} datasets = list(new_scn.datasets.values()) max_area = None if isinstance(destination_area, (str, six.text_type)): destination_area = get_area_def(destination_area) if hasattr(destination_area, 'freeze'): try: max_area = new_scn.max_area() destination_area = destination_area.freeze(max_area) except ValueError: raise ValueError("No dataset areas available to freeze " "DynamicAreaDefinition.") resamplers = {} for dataset, parent_dataset in dataset_walker(datasets): ds_id = DatasetID.from_dict(dataset.attrs) pres = None if parent_dataset is not None: pres = new_datasets[DatasetID.from_dict(parent_dataset.attrs)] if ds_id in new_datasets: replace_anc(dataset, pres) continue if dataset.attrs.get('area') is None: if parent_dataset is None: new_scn.datasets[ds_id] = dataset else: replace_anc(dataset, pres) continue LOG.debug("Resampling %s", ds_id) source_area = dataset.attrs['area'] try: slice_x, slice_y = source_area.get_area_slices( destination_area) source_area = source_area[slice_y, slice_x] dataset = dataset.isel(x=slice_x, y=slice_y) assert ('x', source_area.x_size) in dataset.sizes.items() assert ('y', source_area.y_size) in dataset.sizes.items() dataset.attrs['area'] = source_area except NotImplementedError: LOG.info("Not reducing data before resampling.") if source_area not in resamplers: key, resampler = prepare_resampler(source_area, destination_area, **resample_kwargs) resamplers[source_area] = resampler self.resamplers[key] = resampler kwargs = resample_kwargs.copy() kwargs['resampler'] = resamplers[source_area] res = resample_dataset(dataset, destination_area, **kwargs) new_datasets[ds_id] = res if parent_dataset is None: new_scn.datasets[ds_id] = res else: replace_anc(res, pres)
def update_ds_ids_from_file_handlers(self): """Update DatasetIDs with information from loaded files. This is useful, for example, if dataset resolution may change depending on what files were loaded. """ for file_handlers in self.file_handlers.values(): fh = file_handlers[0] # update resolution in the dataset IDs for this files resolution res = getattr(fh, 'resolution', None) if res is None: continue for ds_id, ds_info in list(self.ids.items()): file_types = ds_info['file_type'] if not isinstance(file_types, list): file_types = [file_types] if fh.filetype_info['file_type'] not in file_types: continue if ds_id.resolution is not None: continue ds_info['resolution'] = res new_id = DatasetID.from_dict(ds_info) self.ids[new_id] = ds_info del self.ids[ds_id]
def update_ds_ids_from_file_handlers(self): """Add or modify available dataset information. Each file handler is consulted on whether or not it can load the dataset with the provided information dictionary. See :meth:`satpy.readers.file_handlers.BaseFileHandler.available_datasets` for more information. """ avail_datasets = self._file_handlers_available_datasets() new_ids = {} for is_avail, ds_info in avail_datasets: # especially from the yaml config coordinates = ds_info.get('coordinates') if isinstance(coordinates, list): # xarray doesn't like concatenating attributes that are # lists: https://github.com/pydata/xarray/issues/2060 ds_info['coordinates'] = tuple(ds_info['coordinates']) ds_info.setdefault('modifiers', tuple()) # default to no mods ds_id = DatasetID.from_dict(ds_info) # all datasets new_ids[ds_id] = ds_info # available datasets # False == we have the file type but it doesn't have this dataset # None == we don't have the file type object to ask if is_avail: self.available_ids[ds_id] = ds_info self.all_ids = new_ids
def _generate_composite(self, comp_node, keepables): """Collect all composite prereqs and create the specified composite. Args: comp_node (Node): Composite Node to generate a Dataset for keepables (set): `set` to update if any datasets are needed when generation is continued later. This can happen if generation is delayed to incompatible areas which would require resampling first. """ if comp_node.name in self.datasets: # already loaded return compositor, prereqs, optional_prereqs = comp_node.data try: prereq_datasets = self._get_prereq_datasets( comp_node.name, prereqs, keepables, ) except KeyError: return optional_datasets = self._get_prereq_datasets( comp_node.name, optional_prereqs, keepables, skip=True ) try: composite = compositor(prereq_datasets, optional_datasets=optional_datasets, **self.attrs) cid = DatasetID.from_dict(composite.attrs) self.datasets[cid] = composite # update the node with the computed DatasetID if comp_node.name in self.wishlist: self.wishlist.remove(comp_node.name) self.wishlist.add(cid) comp_node.name = cid except IncompatibleAreas: LOG.warning("Delaying generation of %s " "because of incompatible areas", str(compositor.id)) preservable_datasets = set(self.datasets.keys()) prereq_ids = set(p.name for p in prereqs) opt_prereq_ids = set(p.name for p in optional_prereqs) keepables |= preservable_datasets & (prereq_ids | opt_prereq_ids) # even though it wasn't generated keep a list of what # might be needed in other compositors keepables.add(comp_node.name) return
def iter_by_area(self): """Generate datasets grouped by Area. :return: generator of (area_obj, list of dataset objects) """ datasets_by_area = {} for ds in self: a = ds.attrs.get('area') datasets_by_area.setdefault(a, []).append( DatasetID.from_dict(ds.attrs)) return datasets_by_area.items()
def _process_composite_config(self, composite_name, conf, composite_type, sensor_id, composite_config, **kwargs): compositors = self.compositors[sensor_id] modifiers = self.modifiers[sensor_id] try: options = conf[composite_type][composite_name] loader = options.pop('compositor') except KeyError: if composite_name in compositors or composite_name in modifiers: return conf raise ValueError( "'compositor' missing or empty in {0}. Option keys = {1}". format(composite_config, str(options.keys()))) options['name'] = composite_name for prereq_type in ['prerequisites', 'optional_prerequisites']: prereqs = [] for item in options.get(prereq_type, []): if isinstance(item, dict): # we want this prerequisite to act as a query with # 'modifiers' being None otherwise it will be an empty # tuple item.setdefault('modifiers', None) key = DatasetID.from_dict(item) prereqs.append(key) else: prereqs.append(item) options[prereq_type] = prereqs if composite_type == 'composites': options.update(**kwargs) key = DatasetID.from_dict(options) comp = loader(**options) compositors[key] = comp elif composite_type == 'modifiers': modifiers[composite_name] = loader, options
def _process_composite_config(self, composite_name, conf, composite_type, sensor_id, composite_config, **kwargs): compositors = self.compositors[sensor_id] modifiers = self.modifiers[sensor_id] try: options = conf[composite_type][composite_name] loader = options.pop('compositor') except KeyError: if composite_name in compositors or composite_name in modifiers: return conf raise ValueError("'compositor' missing or empty in {0}. Option keys = {1}".format( composite_config, str(options.keys()))) options['name'] = composite_name for prereq_type in ['prerequisites', 'optional_prerequisites']: prereqs = [] for item in options.get(prereq_type, []): if isinstance(item, dict): # we want this prerequisite to act as a query with # 'modifiers' being None otherwise it will be an empty # tuple item.setdefault('modifiers', None) key = DatasetID.from_dict(item) prereqs.append(key) else: prereqs.append(item) options[prereq_type] = prereqs if composite_type == 'composites': options.update(**kwargs) key = DatasetID.from_dict(options) comp = loader(**options) compositors[key] = comp elif composite_type == 'modifiers': modifiers[composite_name] = loader, options
def copy(self, datasets=None): """Create a copy of the Scene including dependency information. Args: datasets (list, tuple): `DatasetID` objects for the datasets to include in the new Scene object. """ new_scn = self.__class__() new_scn.attrs = self.attrs.copy() new_scn.dep_tree = self.dep_tree.copy() for ds_id in (datasets or self.keys()): # NOTE: Must use `.datasets` or side effects of `__setitem__` # could hurt us with regards to the wishlist new_scn.datasets[ds_id] = self[ds_id] if not datasets: new_scn.wishlist = self.wishlist.copy() else: new_scn.wishlist = set( [DatasetID.from_dict(ds.attrs) for ds in new_scn]) return new_scn
def add_group_aliases(scenes, groups): """Add aliases for the groups datasets belong to.""" for scene in scenes: scene = scene.copy() for group_id, member_names in groups.items(): # Find out whether one of the datasets in this scene belongs # to this group member_ids = [DatasetID.from_dict(scene[name].attrs) for name in member_names if name in scene] # Add an alias for the group it belongs to if len(member_ids) == 1: member_id = member_ids[0] new_ds = scene[member_id].copy() new_ds.attrs.update(group_id.to_dict()) scene[group_id] = new_ds elif len(member_ids) > 1: raise ValueError('Cannot add multiple datasets from the same ' 'scene to a group') else: # Datasets in this scene don't belong to any group pass yield scene
def update_ds_ids_from_file_handlers(self): """Update DatasetIDs with information from loaded files. This is useful, for example, if dataset resolution may change depending on what files were loaded. """ for file_handlers in self.file_handlers.values(): fh = file_handlers[0] # update resolution in the dataset IDs for this files resolution res = getattr(fh, 'resolution', None) if res is None: continue for ds_id, ds_info in list(self.ids.items()): if fh.filetype_info['file_type'] != ds_info['file_type']: continue if ds_id.resolution is not None: continue ds_info['resolution'] = res new_id = DatasetID.from_dict(ds_info) self.ids[new_id] = ds_info del self.ids[ds_id]
def save_animation(self, filename, datasets=None, fps=10, fill_value=None, batch_size=1, ignore_missing=False, client=True, **kwargs): """Helper method for saving to movie (MP4) or GIF formats. Supported formats are dependent on the `imageio` library and are determined by filename extension by default. .. note:: Starting with ``imageio`` 2.5.0, the use of FFMPEG depends on a separate ``imageio-ffmpeg`` package. By default all datasets available will be saved to individual files using the first Scene's datasets metadata to format the filename provided. If a dataset is not available from a Scene then a black array is used instead (np.zeros(shape)). This function can use the ``dask.distributed`` library for improved performance by computing multiple frames at a time (see `batch_size` option below). If the distributed library is not available then frames will be generated one at a time, one product at a time. Args: filename (str): Filename to save to. Can include python string formatting keys from dataset ``.attrs`` (ex. "{name}_{start_time:%Y%m%d_%H%M%S.gif") datasets (list): DatasetIDs to save (default: all datasets) fps (int): Frames per second for produced animation fill_value (int): Value to use instead creating an alpha band. batch_size (int): Number of frames to compute at the same time. This only has effect if the `dask.distributed` package is installed. This will default to 1. Setting this to 0 or less will attempt to process all frames at once. This option should be used with care to avoid memory issues when trying to improve performance. Note that this is the total number of frames for all datasets, so when saving 2 datasets this will compute ``(batch_size / 2)`` frames for the first dataset and ``(batch_size / 2)`` frames for the second dataset. ignore_missing (bool): Don't include a black frame when a dataset is missing from a child scene. client (bool or dask.distributed.Client): Dask distributed client to use for computation. If this is ``True`` (default) then any existing clients will be used. If this is ``False`` or ``None`` then a client will not be created and ``dask.distributed`` will not be used. If this is a dask ``Client`` object then it will be used for distributed computation. kwargs: Additional keyword arguments to pass to `imageio.get_writer`. """ if imageio is None: raise ImportError("Missing required 'imageio' library") scene_gen = self._scene_gen first_scene = self.first_scene scenes = iter(self._scene_gen) info_scenes = [first_scene] if 'end_time' in filename: # if we need the last scene to generate the filename # then compute all the scenes so we can figure it out log.debug("Generating scenes to compute end_time for filename") scenes = list(scenes) info_scenes.append(scenes[-1]) available_ds = [ first_scene.datasets.get(ds) for ds in first_scene.wishlist ] available_ds = [ DatasetID.from_dict(ds.attrs) for ds in available_ds if ds is not None ] dataset_ids = datasets or available_ds if not dataset_ids: raise RuntimeError( "No datasets found for saving (resampling may be needed to generate composites)" ) writers = {} frames = {} for dataset_id in dataset_ids: if not self.is_generator and not self._all_same_area([dataset_id]): raise ValueError("Sub-scene datasets must all be on the same " "area (see the 'resample' method).") all_datasets = scene_gen[dataset_id] info_datasets = [scn.get(dataset_id) for scn in info_scenes] this_fn, shape, this_fill = self._get_animation_info( info_datasets, filename, fill_value=fill_value) data_to_write = self._get_animation_frames(all_datasets, shape, this_fill, ignore_missing) writer = imageio.get_writer(this_fn, fps=fps, **kwargs) frames[dataset_id] = data_to_write writers[dataset_id] = writer client = self._get_client(client=client) # get an ordered list of frames frame_keys, frames_to_write = list(zip(*frames.items())) frames_to_write = zip(*frames_to_write) if client is not None: self._distribute_frame_compute(writers, frame_keys, frames_to_write, client, batch_size=batch_size) else: self._simple_frame_compute(writers, frame_keys, frames_to_write) for writer in writers.values(): writer.close()