Beispiel #1
0
def _dict_equal(d1, d2):
    """Check that two dictionaries are equal.

    Nested dictionaries are flattened to facilitate comparison.
    """
    d1_flat = flatten_dict(d1)
    d2_flat = flatten_dict(d2)
    if not _dict_keys_equal(d1_flat, d2_flat):
        return False
    for key in d1_flat.keys():
        value_pair = [d1_flat[key], d2_flat[key]]
        if not _all_non_dicts_equal(value_pair):
            return False
    return True
Beispiel #2
0
 def test_flatten_dict(self):
     d = {'a': 1, 'b': {'c': 1, 'd': {'e': 1, 'f': {'g': [1, 2]}}}}
     expected = {'a': 1,
                 'b_c': 1,
                 'b_d_e': 1,
                 'b_d_f_g': [1, 2]}
     self.assertDictEqual(wutils.flatten_dict(d), expected)
Beispiel #3
0
    def save_datasets(self,
                      datasets,
                      filename=None,
                      groups=None,
                      header_attrs=None,
                      engine=None,
                      epoch=EPOCH,
                      flatten_attrs=False,
                      exclude_attrs=None,
                      include_lonlats=True,
                      pretty=False,
                      compression=None,
                      **to_netcdf_kwargs):
        """Save the given datasets in one netCDF file.

        Note that all datasets (if grouping: in one group) must have the same projection coordinates.

        Args:
            datasets (list):
                Datasets to be saved
            filename (str):
                Output file
            groups (dict):
                Group datasets according to the given assignment: `{'group_name': ['dataset1', 'dataset2', ...]}`.
                Group name `None` corresponds to the root of the file, i.e. no group will be created. Warning: The
                results will not be fully CF compliant!
            header_attrs:
                Global attributes to be included
            engine (str):
                Module to be used for writing netCDF files. Follows xarray's
                :meth:`~xarray.Dataset.to_netcdf` engine choices with a
                preference for 'netcdf4'.
            epoch (str):
                Reference time for encoding of time coordinates
            flatten_attrs (bool):
                If True, flatten dict-type attributes
            exclude_attrs (list):
                List of dataset attributes to be excluded
            include_lonlats (bool):
                Always include latitude and longitude coordinates, even for datasets with area definition
            pretty (bool):
                Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent.
            compression (dict):
                Compression to use on the datasets before saving, for example {'zlib': True, 'complevel': 9}.
                This is in turn passed the xarray's `to_netcdf` method:
                http://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html for more possibilities.

        """
        logger.info('Saving datasets to NetCDF4/CF.')

        if groups is None:
            # Write all datasets to the file root without creating a group
            groups_ = {None: datasets}
        else:
            # User specified a group assignment using dataset names. Collect the corresponding datasets.
            groups_ = defaultdict(list)
            for dataset in datasets:
                for group_name, group_members in groups.items():
                    if dataset.attrs['name'] in group_members:
                        groups_[group_name].append(dataset)
                        break

        if compression is None:
            compression = {'zlib': True}

        # Write global attributes to file root (creates the file)
        filename = filename or self.get_filename(**datasets[0].attrs)

        root = xr.Dataset({}, attrs={})
        if header_attrs is not None:
            if flatten_attrs:
                header_attrs = flatten_dict(header_attrs)
            root.attrs = encode_attrs_nc(header_attrs)
        _history_create = 'Created by pytroll/satpy on {}'.format(
            datetime.utcnow())
        if 'history' in root.attrs:
            if isinstance(root.attrs['history'], list):
                root.attrs['history'] = ''.join(root.attrs['history'])
            root.attrs['history'] += '\n' + _history_create
        else:
            root.attrs['history'] = _history_create

        if groups is None:
            # Groups are not CF-1.7 compliant
            if 'Conventions' not in root.attrs:
                root.attrs['Conventions'] = CF_VERSION

        # Remove satpy-specific kwargs
        to_netcdf_kwargs = copy.deepcopy(
            to_netcdf_kwargs)  # may contain dictionaries (encoding)
        satpy_kwargs = ['overlay', 'decorate', 'config_files']
        for kwarg in satpy_kwargs:
            to_netcdf_kwargs.pop(kwarg, None)

        init_nc_kwargs = to_netcdf_kwargs.copy()
        init_nc_kwargs.pop('encoding',
                           None)  # No variables to be encoded at this point
        init_nc_kwargs.pop('unlimited_dims', None)
        written = [
            root.to_netcdf(filename, engine=engine, mode='w', **init_nc_kwargs)
        ]

        # Write datasets to groups (appending to the file; group=None means no group)
        for group_name, group_datasets in groups_.items():
            # XXX: Should we combine the info of all datasets?
            datas, start_times, end_times = self._collect_datasets(
                group_datasets,
                epoch=epoch,
                flatten_attrs=flatten_attrs,
                exclude_attrs=exclude_attrs,
                include_lonlats=include_lonlats,
                pretty=pretty,
                compression=compression)
            dataset = xr.Dataset(datas)
            if 'time' in dataset:
                dataset['time_bnds'] = make_time_bounds(start_times, end_times)
                dataset['time'].attrs['bounds'] = "time_bnds"
                dataset['time'].attrs['standard_name'] = "time"
            else:
                grp_str = ' of group {}'.format(
                    group_name) if group_name is not None else ''
                logger.warning(
                    'No time dimension in datasets{}, skipping time bounds creation.'
                    .format(grp_str))

            encoding, other_to_netcdf_kwargs = update_encoding(
                dataset, to_netcdf_kwargs)
            res = dataset.to_netcdf(filename,
                                    engine=engine,
                                    group=group_name,
                                    mode='a',
                                    encoding=encoding,
                                    **other_to_netcdf_kwargs)
            written.append(res)
        return written
Beispiel #4
0
    def da2cf(dataarray,
              epoch=EPOCH,
              flatten_attrs=False,
              exclude_attrs=None,
              compression=None):
        """Convert the dataarray to something cf-compatible.

        Args:
            dataarray (xr.DataArray):
                The data array to be converted
            epoch (str):
                Reference time for encoding of time coordinates
            flatten_attrs (bool):
                If True, flatten dict-type attributes
            exclude_attrs (list):
                List of dataset attributes to be excluded

        """
        if exclude_attrs is None:
            exclude_attrs = []

        new_data = dataarray.copy()
        if 'name' in new_data.attrs:
            name = new_data.attrs.pop('name')
            new_data = new_data.rename(name)

        # Remove _satpy* attributes
        satpy_attrs = [
            key for key in new_data.attrs if key.startswith('_satpy')
        ]
        for satpy_attr in satpy_attrs:
            new_data.attrs.pop(satpy_attr)

        # Remove area as well as user-defined attributes
        for key in ['area'] + exclude_attrs:
            new_data.attrs.pop(key, None)

        anc = [
            ds.attrs['name']
            for ds in new_data.attrs.get('ancillary_variables', [])
        ]
        if anc:
            new_data.attrs['ancillary_variables'] = ' '.join(anc)
        # TODO: make this a grid mapping or lon/lats
        # new_data.attrs['area'] = str(new_data.attrs.get('area'))
        for key, val in new_data.attrs.copy().items():
            if val is None:
                new_data.attrs.pop(key)
            if key == 'ancillary_variables' and val == []:
                new_data.attrs.pop(key)
        new_data.attrs.pop('_last_resampler', None)
        if compression is not None:
            new_data.encoding.update(compression)

        if 'time' in new_data.coords:
            new_data['time'].encoding['units'] = epoch
            new_data['time'].attrs['standard_name'] = 'time'
            new_data['time'].attrs.pop('bounds', None)
            if 'time' not in new_data.dims:
                new_data = new_data.expand_dims('time')

        if 'x' in new_data.coords:
            new_data['x'].attrs['standard_name'] = 'projection_x_coordinate'
            new_data['x'].attrs['units'] = 'm'

        if 'y' in new_data.coords:
            new_data['y'].attrs['standard_name'] = 'projection_y_coordinate'
            new_data['y'].attrs['units'] = 'm'

        if 'crs' in new_data.coords:
            new_data = new_data.drop_vars('crs')

        if 'long_name' not in new_data.attrs and 'standard_name' not in new_data.attrs:
            new_data.attrs['long_name'] = new_data.name
        if 'prerequisites' in new_data.attrs:
            new_data.attrs['prerequisites'] = [
                np.string_(str(prereq))
                for prereq in new_data.attrs['prerequisites']
            ]

        # Flatten dict-type attributes, if desired
        if flatten_attrs:
            new_data.attrs = flatten_dict(new_data.attrs)

        # Encode attributes to netcdf-compatible datatype
        new_data.attrs = encode_attrs_nc(new_data.attrs)

        return new_data
Beispiel #5
0
    def save_datasets(self,
                      datasets,
                      filename=None,
                      groups=None,
                      header_attrs=None,
                      engine=None,
                      epoch=EPOCH,
                      flatten_attrs=False,
                      exclude_attrs=None,
                      include_lonlats=True,
                      pretty=False,
                      compression=None,
                      include_orig_name=True,
                      numeric_name_prefix='CHANNEL_',
                      **to_netcdf_kwargs):
        """Save the given datasets in one netCDF file.

        Note that all datasets (if grouping: in one group) must have the same projection coordinates.

        Args:
            datasets (list):
                Datasets to be saved
            filename (str):
                Output file
            groups (dict):
                Group datasets according to the given assignment: `{'group_name': ['dataset1', 'dataset2', ...]}`.
                Group name `None` corresponds to the root of the file, i.e. no group will be created. Warning: The
                results will not be fully CF compliant!
            header_attrs:
                Global attributes to be included
            engine (str):
                Module to be used for writing netCDF files. Follows xarray's
                :meth:`~xarray.Dataset.to_netcdf` engine choices with a
                preference for 'netcdf4'.
            epoch (str):
                Reference time for encoding of time coordinates
            flatten_attrs (bool):
                If True, flatten dict-type attributes
            exclude_attrs (list):
                List of dataset attributes to be excluded
            include_lonlats (bool):
                Always include latitude and longitude coordinates, even for datasets with area definition
            pretty (bool):
                Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent.
            compression (dict):
                Compression to use on the datasets before saving, for example {'zlib': True, 'complevel': 9}.
                This is in turn passed the xarray's `to_netcdf` method:
                http://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html for more possibilities.
                (This parameter is now being deprecated, please use the DataArrays's `encoding` from now on.)
            include_orig_name (bool).
                Include the original dataset name as an varaibel attribute in the final netcdf
            numeric_name_prefix (str):
                Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out.

        """
        logger.info('Saving datasets to NetCDF4/CF.')
        compression = _get_compression(compression)

        # Write global attributes to file root (creates the file)
        filename = filename or self.get_filename(**datasets[0].attrs)

        root = xr.Dataset({}, attrs={})
        if header_attrs is not None:
            if flatten_attrs:
                header_attrs = flatten_dict(header_attrs)
            root.attrs = encode_attrs_nc(header_attrs)

        _set_history(root)

        # Remove satpy-specific kwargs
        to_netcdf_kwargs = copy.deepcopy(
            to_netcdf_kwargs)  # may contain dictionaries (encoding)
        satpy_kwargs = ['overlay', 'decorate', 'config_files']
        for kwarg in satpy_kwargs:
            to_netcdf_kwargs.pop(kwarg, None)

        init_nc_kwargs = to_netcdf_kwargs.copy()
        init_nc_kwargs.pop('encoding',
                           None)  # No variables to be encoded at this point
        init_nc_kwargs.pop('unlimited_dims', None)

        groups_ = _get_groups(groups, datasets, root)

        written = [
            root.to_netcdf(filename, engine=engine, mode='w', **init_nc_kwargs)
        ]

        # Write datasets to groups (appending to the file; group=None means no group)
        for group_name, group_datasets in groups_.items():
            # XXX: Should we combine the info of all datasets?
            datas, start_times, end_times = self._collect_datasets(
                group_datasets,
                epoch=epoch,
                flatten_attrs=flatten_attrs,
                exclude_attrs=exclude_attrs,
                include_lonlats=include_lonlats,
                pretty=pretty,
                compression=compression,
                include_orig_name=include_orig_name,
                numeric_name_prefix=numeric_name_prefix)
            dataset = xr.Dataset(datas)
            if 'time' in dataset:
                dataset['time_bnds'] = make_time_bounds(start_times, end_times)
                dataset['time'].attrs['bounds'] = "time_bnds"
                dataset['time'].attrs['standard_name'] = "time"
            else:
                grp_str = ' of group {}'.format(
                    group_name) if group_name is not None else ''
                logger.warning(
                    'No time dimension in datasets{}, skipping time bounds creation.'
                    .format(grp_str))

            encoding, other_to_netcdf_kwargs = update_encoding(
                dataset, to_netcdf_kwargs, numeric_name_prefix)
            res = dataset.to_netcdf(filename,
                                    engine=engine,
                                    group=group_name,
                                    mode='a',
                                    encoding=encoding,
                                    **other_to_netcdf_kwargs)
            written.append(res)

        return written
Beispiel #6
0
    def da2cf(dataarray,
              epoch=EPOCH,
              flatten_attrs=False,
              exclude_attrs=None,
              compression=None,
              include_orig_name=True,
              numeric_name_prefix='CHANNEL_'):
        """Convert the dataarray to something cf-compatible.

        Args:
            dataarray (xr.DataArray):
                The data array to be converted
            epoch (str):
                Reference time for encoding of time coordinates
            flatten_attrs (bool):
                If True, flatten dict-type attributes
            exclude_attrs (list):
                List of dataset attributes to be excluded
            include_orig_name (bool):
                Include the original dataset name in the netcdf variable attributes
            numeric_name_prefix (str):
                Prepend dataset name with this if starting with a digit
        """
        if exclude_attrs is None:
            exclude_attrs = []

        original_name = None
        new_data = dataarray.copy()
        if 'name' in new_data.attrs:
            name = new_data.attrs.pop('name')
            original_name, name = _handle_dataarray_name(
                name, numeric_name_prefix)
            new_data = new_data.rename(name)

        CFWriter._remove_satpy_attributes(new_data)

        # Remove area as well as user-defined attributes
        for key in ['area'] + exclude_attrs:
            new_data.attrs.pop(key, None)

        anc = [
            ds.attrs['name']
            for ds in new_data.attrs.get('ancillary_variables', [])
        ]
        if anc:
            new_data.attrs['ancillary_variables'] = ' '.join(anc)
        # TODO: make this a grid mapping or lon/lats
        # new_data.attrs['area'] = str(new_data.attrs.get('area'))
        CFWriter._cleanup_attrs(new_data)

        if compression is not None:
            new_data.encoding.update(compression)

        new_data = CFWriter._encode_time(new_data, epoch)
        new_data = CFWriter._encode_coords(new_data)

        if 'long_name' not in new_data.attrs and 'standard_name' not in new_data.attrs:
            new_data.attrs['long_name'] = new_data.name
        if 'prerequisites' in new_data.attrs:
            new_data.attrs['prerequisites'] = [
                np.string_(str(prereq))
                for prereq in new_data.attrs['prerequisites']
            ]

        if include_orig_name and numeric_name_prefix and original_name and original_name != name:
            new_data.attrs['original_name'] = original_name

        # Flatten dict-type attributes, if desired
        if flatten_attrs:
            new_data.attrs = flatten_dict(new_data.attrs)

        # Encode attributes to netcdf-compatible datatype
        new_data.attrs = encode_attrs_nc(new_data.attrs)

        return new_data