def _dict_equal(d1, d2): """Check that two dictionaries are equal. Nested dictionaries are flattened to facilitate comparison. """ d1_flat = flatten_dict(d1) d2_flat = flatten_dict(d2) if not _dict_keys_equal(d1_flat, d2_flat): return False for key in d1_flat.keys(): value_pair = [d1_flat[key], d2_flat[key]] if not _all_non_dicts_equal(value_pair): return False return True
def test_flatten_dict(self): d = {'a': 1, 'b': {'c': 1, 'd': {'e': 1, 'f': {'g': [1, 2]}}}} expected = {'a': 1, 'b_c': 1, 'b_d_e': 1, 'b_d_f_g': [1, 2]} self.assertDictEqual(wutils.flatten_dict(d), expected)
def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, include_lonlats=True, pretty=False, compression=None, **to_netcdf_kwargs): """Save the given datasets in one netCDF file. Note that all datasets (if grouping: in one group) must have the same projection coordinates. Args: datasets (list): Datasets to be saved filename (str): Output file groups (dict): Group datasets according to the given assignment: `{'group_name': ['dataset1', 'dataset2', ...]}`. Group name `None` corresponds to the root of the file, i.e. no group will be created. Warning: The results will not be fully CF compliant! header_attrs: Global attributes to be included engine (str): Module to be used for writing netCDF files. Follows xarray's :meth:`~xarray.Dataset.to_netcdf` engine choices with a preference for 'netcdf4'. epoch (str): Reference time for encoding of time coordinates flatten_attrs (bool): If True, flatten dict-type attributes exclude_attrs (list): List of dataset attributes to be excluded include_lonlats (bool): Always include latitude and longitude coordinates, even for datasets with area definition pretty (bool): Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. compression (dict): Compression to use on the datasets before saving, for example {'zlib': True, 'complevel': 9}. This is in turn passed the xarray's `to_netcdf` method: http://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html for more possibilities. """ logger.info('Saving datasets to NetCDF4/CF.') if groups is None: # Write all datasets to the file root without creating a group groups_ = {None: datasets} else: # User specified a group assignment using dataset names. Collect the corresponding datasets. groups_ = defaultdict(list) for dataset in datasets: for group_name, group_members in groups.items(): if dataset.attrs['name'] in group_members: groups_[group_name].append(dataset) break if compression is None: compression = {'zlib': True} # Write global attributes to file root (creates the file) filename = filename or self.get_filename(**datasets[0].attrs) root = xr.Dataset({}, attrs={}) if header_attrs is not None: if flatten_attrs: header_attrs = flatten_dict(header_attrs) root.attrs = encode_attrs_nc(header_attrs) _history_create = 'Created by pytroll/satpy on {}'.format( datetime.utcnow()) if 'history' in root.attrs: if isinstance(root.attrs['history'], list): root.attrs['history'] = ''.join(root.attrs['history']) root.attrs['history'] += '\n' + _history_create else: root.attrs['history'] = _history_create if groups is None: # Groups are not CF-1.7 compliant if 'Conventions' not in root.attrs: root.attrs['Conventions'] = CF_VERSION # Remove satpy-specific kwargs to_netcdf_kwargs = copy.deepcopy( to_netcdf_kwargs) # may contain dictionaries (encoding) satpy_kwargs = ['overlay', 'decorate', 'config_files'] for kwarg in satpy_kwargs: to_netcdf_kwargs.pop(kwarg, None) init_nc_kwargs = to_netcdf_kwargs.copy() init_nc_kwargs.pop('encoding', None) # No variables to be encoded at this point init_nc_kwargs.pop('unlimited_dims', None) written = [ root.to_netcdf(filename, engine=engine, mode='w', **init_nc_kwargs) ] # Write datasets to groups (appending to the file; group=None means no group) for group_name, group_datasets in groups_.items(): # XXX: Should we combine the info of all datasets? datas, start_times, end_times = self._collect_datasets( group_datasets, epoch=epoch, flatten_attrs=flatten_attrs, exclude_attrs=exclude_attrs, include_lonlats=include_lonlats, pretty=pretty, compression=compression) dataset = xr.Dataset(datas) if 'time' in dataset: dataset['time_bnds'] = make_time_bounds(start_times, end_times) dataset['time'].attrs['bounds'] = "time_bnds" dataset['time'].attrs['standard_name'] = "time" else: grp_str = ' of group {}'.format( group_name) if group_name is not None else '' logger.warning( 'No time dimension in datasets{}, skipping time bounds creation.' .format(grp_str)) encoding, other_to_netcdf_kwargs = update_encoding( dataset, to_netcdf_kwargs) res = dataset.to_netcdf(filename, engine=engine, group=group_name, mode='a', encoding=encoding, **other_to_netcdf_kwargs) written.append(res) return written
def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, compression=None): """Convert the dataarray to something cf-compatible. Args: dataarray (xr.DataArray): The data array to be converted epoch (str): Reference time for encoding of time coordinates flatten_attrs (bool): If True, flatten dict-type attributes exclude_attrs (list): List of dataset attributes to be excluded """ if exclude_attrs is None: exclude_attrs = [] new_data = dataarray.copy() if 'name' in new_data.attrs: name = new_data.attrs.pop('name') new_data = new_data.rename(name) # Remove _satpy* attributes satpy_attrs = [ key for key in new_data.attrs if key.startswith('_satpy') ] for satpy_attr in satpy_attrs: new_data.attrs.pop(satpy_attr) # Remove area as well as user-defined attributes for key in ['area'] + exclude_attrs: new_data.attrs.pop(key, None) anc = [ ds.attrs['name'] for ds in new_data.attrs.get('ancillary_variables', []) ] if anc: new_data.attrs['ancillary_variables'] = ' '.join(anc) # TODO: make this a grid mapping or lon/lats # new_data.attrs['area'] = str(new_data.attrs.get('area')) for key, val in new_data.attrs.copy().items(): if val is None: new_data.attrs.pop(key) if key == 'ancillary_variables' and val == []: new_data.attrs.pop(key) new_data.attrs.pop('_last_resampler', None) if compression is not None: new_data.encoding.update(compression) if 'time' in new_data.coords: new_data['time'].encoding['units'] = epoch new_data['time'].attrs['standard_name'] = 'time' new_data['time'].attrs.pop('bounds', None) if 'time' not in new_data.dims: new_data = new_data.expand_dims('time') if 'x' in new_data.coords: new_data['x'].attrs['standard_name'] = 'projection_x_coordinate' new_data['x'].attrs['units'] = 'm' if 'y' in new_data.coords: new_data['y'].attrs['standard_name'] = 'projection_y_coordinate' new_data['y'].attrs['units'] = 'm' if 'crs' in new_data.coords: new_data = new_data.drop_vars('crs') if 'long_name' not in new_data.attrs and 'standard_name' not in new_data.attrs: new_data.attrs['long_name'] = new_data.name if 'prerequisites' in new_data.attrs: new_data.attrs['prerequisites'] = [ np.string_(str(prereq)) for prereq in new_data.attrs['prerequisites'] ] # Flatten dict-type attributes, if desired if flatten_attrs: new_data.attrs = flatten_dict(new_data.attrs) # Encode attributes to netcdf-compatible datatype new_data.attrs = encode_attrs_nc(new_data.attrs) return new_data
def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, include_lonlats=True, pretty=False, compression=None, include_orig_name=True, numeric_name_prefix='CHANNEL_', **to_netcdf_kwargs): """Save the given datasets in one netCDF file. Note that all datasets (if grouping: in one group) must have the same projection coordinates. Args: datasets (list): Datasets to be saved filename (str): Output file groups (dict): Group datasets according to the given assignment: `{'group_name': ['dataset1', 'dataset2', ...]}`. Group name `None` corresponds to the root of the file, i.e. no group will be created. Warning: The results will not be fully CF compliant! header_attrs: Global attributes to be included engine (str): Module to be used for writing netCDF files. Follows xarray's :meth:`~xarray.Dataset.to_netcdf` engine choices with a preference for 'netcdf4'. epoch (str): Reference time for encoding of time coordinates flatten_attrs (bool): If True, flatten dict-type attributes exclude_attrs (list): List of dataset attributes to be excluded include_lonlats (bool): Always include latitude and longitude coordinates, even for datasets with area definition pretty (bool): Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. compression (dict): Compression to use on the datasets before saving, for example {'zlib': True, 'complevel': 9}. This is in turn passed the xarray's `to_netcdf` method: http://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html for more possibilities. (This parameter is now being deprecated, please use the DataArrays's `encoding` from now on.) include_orig_name (bool). Include the original dataset name as an varaibel attribute in the final netcdf numeric_name_prefix (str): Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. """ logger.info('Saving datasets to NetCDF4/CF.') compression = _get_compression(compression) # Write global attributes to file root (creates the file) filename = filename or self.get_filename(**datasets[0].attrs) root = xr.Dataset({}, attrs={}) if header_attrs is not None: if flatten_attrs: header_attrs = flatten_dict(header_attrs) root.attrs = encode_attrs_nc(header_attrs) _set_history(root) # Remove satpy-specific kwargs to_netcdf_kwargs = copy.deepcopy( to_netcdf_kwargs) # may contain dictionaries (encoding) satpy_kwargs = ['overlay', 'decorate', 'config_files'] for kwarg in satpy_kwargs: to_netcdf_kwargs.pop(kwarg, None) init_nc_kwargs = to_netcdf_kwargs.copy() init_nc_kwargs.pop('encoding', None) # No variables to be encoded at this point init_nc_kwargs.pop('unlimited_dims', None) groups_ = _get_groups(groups, datasets, root) written = [ root.to_netcdf(filename, engine=engine, mode='w', **init_nc_kwargs) ] # Write datasets to groups (appending to the file; group=None means no group) for group_name, group_datasets in groups_.items(): # XXX: Should we combine the info of all datasets? datas, start_times, end_times = self._collect_datasets( group_datasets, epoch=epoch, flatten_attrs=flatten_attrs, exclude_attrs=exclude_attrs, include_lonlats=include_lonlats, pretty=pretty, compression=compression, include_orig_name=include_orig_name, numeric_name_prefix=numeric_name_prefix) dataset = xr.Dataset(datas) if 'time' in dataset: dataset['time_bnds'] = make_time_bounds(start_times, end_times) dataset['time'].attrs['bounds'] = "time_bnds" dataset['time'].attrs['standard_name'] = "time" else: grp_str = ' of group {}'.format( group_name) if group_name is not None else '' logger.warning( 'No time dimension in datasets{}, skipping time bounds creation.' .format(grp_str)) encoding, other_to_netcdf_kwargs = update_encoding( dataset, to_netcdf_kwargs, numeric_name_prefix) res = dataset.to_netcdf(filename, engine=engine, group=group_name, mode='a', encoding=encoding, **other_to_netcdf_kwargs) written.append(res) return written
def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, compression=None, include_orig_name=True, numeric_name_prefix='CHANNEL_'): """Convert the dataarray to something cf-compatible. Args: dataarray (xr.DataArray): The data array to be converted epoch (str): Reference time for encoding of time coordinates flatten_attrs (bool): If True, flatten dict-type attributes exclude_attrs (list): List of dataset attributes to be excluded include_orig_name (bool): Include the original dataset name in the netcdf variable attributes numeric_name_prefix (str): Prepend dataset name with this if starting with a digit """ if exclude_attrs is None: exclude_attrs = [] original_name = None new_data = dataarray.copy() if 'name' in new_data.attrs: name = new_data.attrs.pop('name') original_name, name = _handle_dataarray_name( name, numeric_name_prefix) new_data = new_data.rename(name) CFWriter._remove_satpy_attributes(new_data) # Remove area as well as user-defined attributes for key in ['area'] + exclude_attrs: new_data.attrs.pop(key, None) anc = [ ds.attrs['name'] for ds in new_data.attrs.get('ancillary_variables', []) ] if anc: new_data.attrs['ancillary_variables'] = ' '.join(anc) # TODO: make this a grid mapping or lon/lats # new_data.attrs['area'] = str(new_data.attrs.get('area')) CFWriter._cleanup_attrs(new_data) if compression is not None: new_data.encoding.update(compression) new_data = CFWriter._encode_time(new_data, epoch) new_data = CFWriter._encode_coords(new_data) if 'long_name' not in new_data.attrs and 'standard_name' not in new_data.attrs: new_data.attrs['long_name'] = new_data.name if 'prerequisites' in new_data.attrs: new_data.attrs['prerequisites'] = [ np.string_(str(prereq)) for prereq in new_data.attrs['prerequisites'] ] if include_orig_name and numeric_name_prefix and original_name and original_name != name: new_data.attrs['original_name'] = original_name # Flatten dict-type attributes, if desired if flatten_attrs: new_data.attrs = flatten_dict(new_data.attrs) # Encode attributes to netcdf-compatible datatype new_data.attrs = encode_attrs_nc(new_data.attrs) return new_data