Exemple #1
0
    def _process_composite_config(self, composite_name, conf, composite_type,
                                  sensor_id, composite_config, **kwargs):

        compositors = self.compositors[sensor_id]
        modifiers = self.modifiers[sensor_id]

        try:
            options = conf[composite_type][composite_name]
            loader = options.pop('compositor')
        except KeyError:
            if composite_name in compositors or composite_name in modifiers:
                return conf
            raise ValueError("'compositor' missing or empty in %s" %
                             composite_config)

        options['name'] = composite_name
        for prereq_type in ['prerequisites', 'optional_prerequisites']:
            prereqs = []
            for item in options.get(prereq_type, []):
                if isinstance(item, dict):
                    key = DatasetID.from_dict(item)
                    prereqs.append(key)
                else:
                    prereqs.append(item)
            options[prereq_type] = prereqs

        if composite_type == 'composites':
            options.update(**kwargs)
            key = DatasetID.from_dict(options)
            comp = loader(**options)
            compositors[key] = comp
        elif composite_type == 'modifiers':
            modifiers[composite_name] = loader, options
Exemple #2
0
    def _slice_datasets(self, dataset_ids, slice_key, new_area, area_only=True):
        """Slice scene in-place for the datasets specified."""
        new_datasets = {}
        datasets = (self[ds_id] for ds_id in dataset_ids)
        for ds, parent_ds in dataset_walker(datasets):
            ds_id = DatasetID.from_dict(ds.attrs)
            # handle ancillary variables
            pres = None
            if parent_ds is not None:
                pres = new_datasets[DatasetID.from_dict(parent_ds.attrs)]
            if ds_id in new_datasets:
                replace_anc(ds, pres)
                continue
            if area_only and ds.attrs.get('area') is None:
                new_datasets[ds_id] = ds
                replace_anc(ds, pres)
                continue

            if not isinstance(slice_key, dict):
                # match dimension name to slice object
                key = dict(zip(ds.dims, slice_key))
            else:
                key = slice_key
            new_ds = ds.isel(**key)
            if new_area is not None:
                new_ds.attrs['area'] = new_area

            new_datasets[ds_id] = new_ds
            if parent_ds is None:
                # don't use `__setitem__` because we don't want this to
                # affect the existing wishlist/dep tree
                self.datasets[ds_id] = new_ds
            else:
                replace_anc(new_ds, pres)
Exemple #3
0
    def _resampled_scene(self, new_scn, destination_area, **resample_kwargs):
        """Resample `datasets` to the `destination` area."""
        new_datasets = {}
        datasets = list(new_scn.datasets.values())
        max_area = None
        if isinstance(destination_area, (str, six.text_type)):
            destination_area = get_area_def(destination_area)
        if hasattr(destination_area, 'freeze'):
            try:
                max_area = new_scn.max_area()
                destination_area = destination_area.freeze(max_area)
            except ValueError:
                raise ValueError("No dataset areas available to freeze "
                                 "DynamicAreaDefinition.")

        resamplers = {}
        for dataset, parent_dataset in dataset_walker(datasets):
            ds_id = DatasetID.from_dict(dataset.attrs)
            pres = None
            if parent_dataset is not None:
                pres = new_datasets[DatasetID.from_dict(parent_dataset.attrs)]
            if ds_id in new_datasets:
                replace_anc(dataset, pres)
                continue
            if dataset.attrs.get('area') is None:
                if parent_dataset is None:
                    new_scn.datasets[ds_id] = dataset
                else:
                    replace_anc(dataset, pres)
                continue
            LOG.debug("Resampling %s", ds_id)
            source_area = dataset.attrs['area']
            try:
                slice_x, slice_y = source_area.get_area_slices(
                    destination_area)
                source_area = source_area[slice_y, slice_x]
                dataset = dataset.isel(x=slice_x, y=slice_y)
                assert ('x', source_area.x_size) in dataset.sizes.items()
                assert ('y', source_area.y_size) in dataset.sizes.items()
                dataset.attrs['area'] = source_area
            except NotImplementedError:
                LOG.info("Not reducing data before resampling.")
            if source_area not in resamplers:
                key, resampler = prepare_resampler(source_area,
                                                   destination_area,
                                                   **resample_kwargs)
                resamplers[source_area] = resampler
                self.resamplers[key] = resampler
            kwargs = resample_kwargs.copy()
            kwargs['resampler'] = resamplers[source_area]
            res = resample_dataset(dataset, destination_area, **kwargs)
            new_datasets[ds_id] = res
            if parent_dataset is None:
                new_scn.datasets[ds_id] = res
            else:
                replace_anc(res, pres)
Exemple #4
0
    def update_ds_ids_from_file_handlers(self):
        """Update DatasetIDs with information from loaded files.

        This is useful, for example, if dataset resolution may change
        depending on what files were loaded.

        """
        for file_handlers in self.file_handlers.values():
            fh = file_handlers[0]
            # update resolution in the dataset IDs for this files resolution
            res = getattr(fh, 'resolution', None)
            if res is None:
                continue

            for ds_id, ds_info in list(self.ids.items()):
                file_types = ds_info['file_type']
                if not isinstance(file_types, list):
                    file_types = [file_types]
                if fh.filetype_info['file_type'] not in file_types:
                    continue
                if ds_id.resolution is not None:
                    continue
                ds_info['resolution'] = res
                new_id = DatasetID.from_dict(ds_info)
                self.ids[new_id] = ds_info
                del self.ids[ds_id]
Exemple #5
0
    def update_ds_ids_from_file_handlers(self):
        """Add or modify available dataset information.

        Each file handler is consulted on whether or not it can load the
        dataset with the provided information dictionary.
        See
        :meth:`satpy.readers.file_handlers.BaseFileHandler.available_datasets`
        for more information.

        """
        avail_datasets = self._file_handlers_available_datasets()
        new_ids = {}
        for is_avail, ds_info in avail_datasets:
            # especially from the yaml config
            coordinates = ds_info.get('coordinates')
            if isinstance(coordinates, list):
                # xarray doesn't like concatenating attributes that are
                # lists: https://github.com/pydata/xarray/issues/2060
                ds_info['coordinates'] = tuple(ds_info['coordinates'])

            ds_info.setdefault('modifiers', tuple())  # default to no mods
            ds_id = DatasetID.from_dict(ds_info)
            # all datasets
            new_ids[ds_id] = ds_info
            # available datasets
            # False == we have the file type but it doesn't have this dataset
            # None == we don't have the file type object to ask
            if is_avail:
                self.available_ids[ds_id] = ds_info
        self.all_ids = new_ids
Exemple #6
0
    def _generate_composite(self, comp_node, keepables):
        """Collect all composite prereqs and create the specified composite.

        Args:
            comp_node (Node): Composite Node to generate a Dataset for
            keepables (set): `set` to update if any datasets are needed
                             when generation is continued later. This can
                             happen if generation is delayed to incompatible
                             areas which would require resampling first.

        """
        if comp_node.name in self.datasets:
            # already loaded
            return
        compositor, prereqs, optional_prereqs = comp_node.data

        try:
            prereq_datasets = self._get_prereq_datasets(
                comp_node.name,
                prereqs,
                keepables,
            )
        except KeyError:
            return

        optional_datasets = self._get_prereq_datasets(
            comp_node.name,
            optional_prereqs,
            keepables,
            skip=True
        )

        try:
            composite = compositor(prereq_datasets,
                                   optional_datasets=optional_datasets,
                                   **self.attrs)

            cid = DatasetID.from_dict(composite.attrs)

            self.datasets[cid] = composite
            # update the node with the computed DatasetID
            if comp_node.name in self.wishlist:
                self.wishlist.remove(comp_node.name)
                self.wishlist.add(cid)
            comp_node.name = cid
        except IncompatibleAreas:
            LOG.warning("Delaying generation of %s "
                        "because of incompatible areas",
                        str(compositor.id))
            preservable_datasets = set(self.datasets.keys())
            prereq_ids = set(p.name for p in prereqs)
            opt_prereq_ids = set(p.name for p in optional_prereqs)
            keepables |= preservable_datasets & (prereq_ids | opt_prereq_ids)
            # even though it wasn't generated keep a list of what
            # might be needed in other compositors
            keepables.add(comp_node.name)
            return
Exemple #7
0
    def iter_by_area(self):
        """Generate datasets grouped by Area.

        :return: generator of (area_obj, list of dataset objects)
        """
        datasets_by_area = {}
        for ds in self:
            a = ds.attrs.get('area')
            datasets_by_area.setdefault(a, []).append(
                DatasetID.from_dict(ds.attrs))

        return datasets_by_area.items()
Exemple #8
0
    def _process_composite_config(self, composite_name, conf, composite_type,
                                  sensor_id, composite_config, **kwargs):

        compositors = self.compositors[sensor_id]
        modifiers = self.modifiers[sensor_id]

        try:
            options = conf[composite_type][composite_name]
            loader = options.pop('compositor')
        except KeyError:
            if composite_name in compositors or composite_name in modifiers:
                return conf
            raise ValueError(
                "'compositor' missing or empty in {0}. Option keys = {1}".
                format(composite_config, str(options.keys())))

        options['name'] = composite_name
        for prereq_type in ['prerequisites', 'optional_prerequisites']:
            prereqs = []
            for item in options.get(prereq_type, []):
                if isinstance(item, dict):
                    # we want this prerequisite to act as a query with
                    # 'modifiers' being None otherwise it will be an empty
                    # tuple
                    item.setdefault('modifiers', None)
                    key = DatasetID.from_dict(item)
                    prereqs.append(key)
                else:
                    prereqs.append(item)
            options[prereq_type] = prereqs

        if composite_type == 'composites':
            options.update(**kwargs)
            key = DatasetID.from_dict(options)
            comp = loader(**options)
            compositors[key] = comp
        elif composite_type == 'modifiers':
            modifiers[composite_name] = loader, options
Exemple #9
0
    def _process_composite_config(self, composite_name, conf,
                                  composite_type, sensor_id, composite_config, **kwargs):

        compositors = self.compositors[sensor_id]
        modifiers = self.modifiers[sensor_id]

        try:
            options = conf[composite_type][composite_name]
            loader = options.pop('compositor')
        except KeyError:
            if composite_name in compositors or composite_name in modifiers:
                return conf
            raise ValueError("'compositor' missing or empty in {0}. Option keys = {1}".format(
                composite_config, str(options.keys())))

        options['name'] = composite_name
        for prereq_type in ['prerequisites', 'optional_prerequisites']:
            prereqs = []
            for item in options.get(prereq_type, []):
                if isinstance(item, dict):
                    # we want this prerequisite to act as a query with
                    # 'modifiers' being None otherwise it will be an empty
                    # tuple
                    item.setdefault('modifiers', None)
                    key = DatasetID.from_dict(item)
                    prereqs.append(key)
                else:
                    prereqs.append(item)
            options[prereq_type] = prereqs

        if composite_type == 'composites':
            options.update(**kwargs)
            key = DatasetID.from_dict(options)
            comp = loader(**options)
            compositors[key] = comp
        elif composite_type == 'modifiers':
            modifiers[composite_name] = loader, options
Exemple #10
0
    def copy(self, datasets=None):
        """Create a copy of the Scene including dependency information.

        Args:
            datasets (list, tuple): `DatasetID` objects for the datasets
                                    to include in the new Scene object.

        """
        new_scn = self.__class__()
        new_scn.attrs = self.attrs.copy()
        new_scn.dep_tree = self.dep_tree.copy()

        for ds_id in (datasets or self.keys()):
            # NOTE: Must use `.datasets` or side effects of `__setitem__`
            #       could hurt us with regards to the wishlist
            new_scn.datasets[ds_id] = self[ds_id]

        if not datasets:
            new_scn.wishlist = self.wishlist.copy()
        else:
            new_scn.wishlist = set(
                [DatasetID.from_dict(ds.attrs) for ds in new_scn])
        return new_scn
Exemple #11
0
def add_group_aliases(scenes, groups):
    """Add aliases for the groups datasets belong to."""
    for scene in scenes:
        scene = scene.copy()
        for group_id, member_names in groups.items():
            # Find out whether one of the datasets in this scene belongs
            # to this group
            member_ids = [DatasetID.from_dict(scene[name].attrs)
                          for name in member_names if name in scene]

            # Add an alias for the group it belongs to
            if len(member_ids) == 1:
                member_id = member_ids[0]
                new_ds = scene[member_id].copy()
                new_ds.attrs.update(group_id.to_dict())
                scene[group_id] = new_ds
            elif len(member_ids) > 1:
                raise ValueError('Cannot add multiple datasets from the same '
                                 'scene to a group')
            else:
                # Datasets in this scene don't belong to any group
                pass
        yield scene
Exemple #12
0
    def update_ds_ids_from_file_handlers(self):
        """Update DatasetIDs with information from loaded files.

        This is useful, for example, if dataset resolution may change
        depending on what files were loaded.

        """
        for file_handlers in self.file_handlers.values():
            fh = file_handlers[0]
            # update resolution in the dataset IDs for this files resolution
            res = getattr(fh, 'resolution', None)
            if res is None:
                continue

            for ds_id, ds_info in list(self.ids.items()):
                if fh.filetype_info['file_type'] != ds_info['file_type']:
                    continue
                if ds_id.resolution is not None:
                    continue
                ds_info['resolution'] = res
                new_id = DatasetID.from_dict(ds_info)
                self.ids[new_id] = ds_info
                del self.ids[ds_id]
Exemple #13
0
    def save_animation(self,
                       filename,
                       datasets=None,
                       fps=10,
                       fill_value=None,
                       batch_size=1,
                       ignore_missing=False,
                       client=True,
                       **kwargs):
        """Helper method for saving to movie (MP4) or GIF formats.

        Supported formats are dependent on the `imageio` library and are
        determined by filename extension by default.

        .. note::

            Starting with ``imageio`` 2.5.0, the use of FFMPEG depends on
            a separate ``imageio-ffmpeg`` package.

        By default all datasets available will be saved to individual files
        using the first Scene's datasets metadata to format the filename
        provided. If a dataset is not available from a Scene then a black
        array is used instead (np.zeros(shape)).

        This function can use the ``dask.distributed`` library for improved
        performance by computing multiple frames at a time (see `batch_size`
        option below). If the distributed library is not available then frames
        will be generated one at a time, one product at a time.

        Args:
            filename (str): Filename to save to. Can include python string
                            formatting keys from dataset ``.attrs``
                            (ex. "{name}_{start_time:%Y%m%d_%H%M%S.gif")
            datasets (list): DatasetIDs to save (default: all datasets)
            fps (int): Frames per second for produced animation
            fill_value (int): Value to use instead creating an alpha band.
            batch_size (int): Number of frames to compute at the same time.
                This only has effect if the `dask.distributed` package is
                installed. This will default to 1. Setting this to 0 or less
                will attempt to process all frames at once. This option should
                be used with care to avoid memory issues when trying to
                improve performance. Note that this is the total number of
                frames for all datasets, so when saving 2 datasets this will
                compute ``(batch_size / 2)`` frames for the first dataset and
                ``(batch_size / 2)`` frames for the second dataset.
            ignore_missing (bool): Don't include a black frame when a dataset
                                   is missing from a child scene.
            client (bool or dask.distributed.Client): Dask distributed client
                to use for computation. If this is ``True`` (default) then
                any existing clients will be used.
                If this is ``False`` or ``None`` then a client will not be
                created and ``dask.distributed`` will not be used. If this
                is a dask ``Client`` object then it will be used for
                distributed computation.
            kwargs: Additional keyword arguments to pass to
                   `imageio.get_writer`.

        """
        if imageio is None:
            raise ImportError("Missing required 'imageio' library")

        scene_gen = self._scene_gen
        first_scene = self.first_scene
        scenes = iter(self._scene_gen)
        info_scenes = [first_scene]
        if 'end_time' in filename:
            # if we need the last scene to generate the filename
            # then compute all the scenes so we can figure it out
            log.debug("Generating scenes to compute end_time for filename")
            scenes = list(scenes)
            info_scenes.append(scenes[-1])

        available_ds = [
            first_scene.datasets.get(ds) for ds in first_scene.wishlist
        ]
        available_ds = [
            DatasetID.from_dict(ds.attrs) for ds in available_ds
            if ds is not None
        ]
        dataset_ids = datasets or available_ds

        if not dataset_ids:
            raise RuntimeError(
                "No datasets found for saving (resampling may be needed to generate composites)"
            )

        writers = {}
        frames = {}
        for dataset_id in dataset_ids:
            if not self.is_generator and not self._all_same_area([dataset_id]):
                raise ValueError("Sub-scene datasets must all be on the same "
                                 "area (see the 'resample' method).")

            all_datasets = scene_gen[dataset_id]
            info_datasets = [scn.get(dataset_id) for scn in info_scenes]
            this_fn, shape, this_fill = self._get_animation_info(
                info_datasets, filename, fill_value=fill_value)
            data_to_write = self._get_animation_frames(all_datasets, shape,
                                                       this_fill,
                                                       ignore_missing)

            writer = imageio.get_writer(this_fn, fps=fps, **kwargs)
            frames[dataset_id] = data_to_write
            writers[dataset_id] = writer

        client = self._get_client(client=client)
        # get an ordered list of frames
        frame_keys, frames_to_write = list(zip(*frames.items()))
        frames_to_write = zip(*frames_to_write)
        if client is not None:
            self._distribute_frame_compute(writers,
                                           frame_keys,
                                           frames_to_write,
                                           client,
                                           batch_size=batch_size)
        else:
            self._simple_frame_compute(writers, frame_keys, frames_to_write)

        for writer in writers.values():
            writer.close()