def _check_input_path(input: str): input_path = DaskMSStore(input) if not input_path.exists(): raise ArgumentTypeError(f"{input} is an invalid path.") return input_path
def test_storage_options_from_config(tmp_path, py_minio_client, minio_admin, minio_alias, minio_user_key, minio_url, s3_bucket_name): filename = "test.txt" payload = "How now brown cow" py_minio_client.make_bucket(s3_bucket_name) py_minio_client.put_object(s3_bucket_name, f"subdir/{filename}", BytesIO(payload.encode("utf-8")), len(payload)) url = f"s3://{s3_bucket_name}" config_file = tmp_path / "config.yaml" opts = { "key": minio_user_key, "secret": minio_user_key, "client_kwargs": { "endpoint_url": minio_url.geturl(), "region_name": "af-south-1", "verify": False, }, } with open(config_file, "w") as f: yaml.safe_dump({"storage_options": {url: opts}}, f) config.refresh(paths=config.paths + [str(tmp_path)]) try: store = DaskMSStore(f"{url}/subdir") assert store.storage_options == opts with store.open("test.txt", "rb") as f: assert f.read() == payload.encode("utf-8") finally: config.refresh()
def test_xds_to_zarr_local(ms, spw_table, ant_table, tmp_path_factory): zarr_store = tmp_path_factory.mktemp("zarr_store") / "test.zarr" spw_store = zarr_store.parent / f"{zarr_store.name}::SPECTRAL_WINDOW" ant_store = zarr_store.parent / f"{zarr_store.name}::ANTENNA" return zarr_tester(ms, spw_table, ant_table, DaskMSStore(zarr_store), DaskMSStore(spw_store), DaskMSStore(ant_store))
def test_store_main_access(tmp_path_factory): store_dir = tmp_path_factory.mktemp("STORE0") store = DaskMSStore(f"file://{store_dir}") assert store.url == f"file://{store_dir}" assert store.full_path == str(store_dir) assert store.canonical_path == str(store_dir) assert store.table is None with store.open("foo.txt", "w") as f: f.write("How now brown cow") assert store.exists("foo.txt") assert (store_dir / "foo.txt").exists()
def test_store_subtable_access(tmp_path_factory): store_dir = tmp_path_factory.mktemp("STORE0") table_dir = store_dir / "TABLE" table_dir.mkdir() store = DaskMSStore(f"file://{store_dir}::TABLE") assert store.url == f"file://{store_dir}::TABLE" assert store.full_path == f"{store_dir}{store.fs.sep}TABLE" assert store.canonical_path == f"{store_dir}::TABLE" assert store.table == "TABLE" with store.open("foo.txt", "w") as f: f.write("How now brown cow") assert store.exists("foo.txt") assert (table_dir / "foo.txt").exists()
def xds_from_storage_ms(store, **kwargs): if not isinstance(store, DaskMSStore): store = DaskMSStore(store, **kwargs.pop("storage_options", {})) typ = store.type() if typ == "casa": return xds_from_ms(store, **kwargs) elif typ == "zarr": from daskms.experimental.zarr import xds_from_zarr return xds_from_zarr(store, **kwargs) elif typ == "parquet": from daskms.experimental.arrow import xds_from_parquet return xds_from_parquet(store, **kwargs) else: raise TypeError(f"Unknown dataset {typ}")
def xds_to_storage_table(xds, store, **kwargs): if not isinstance(store, DaskMSStore): store = DaskMSStore(store, **kwargs.pop("storage_options", {})) typ = store.type() if typ == "casa": filter_kwargs(xds_to_table, kwargs) return xds_to_table(xds, store, **kwargs) elif typ == "zarr": from daskms.experimental.zarr import xds_to_zarr filter_kwargs(xds_to_zarr, kwargs) return xds_to_zarr(xds, store, **kwargs) elif typ == "parquet": from daskms.experimental.arrow import xds_to_parquet filter_kwargs(xds_to_parquet, kwargs) return xds_to_parquet(xds, store, **kwargs) else: raise TypeError(f"Unknown dataset {typ}")
def test_store_pickle(): store = DaskMSStore("s3://binface", key="foo", secret="bar", client_kwargs={ "endpoint_url": "http://127.0.0.1:9000", "region_name": "af-cpt" }) pstore = pickle.loads(pickle.dumps(store)) assert pstore == store
def test_xds_to_zarr_s3(ms, spw_table, ant_table, py_minio_client, minio_user_key, minio_url, s3_bucket_name): py_minio_client.make_bucket(s3_bucket_name) zarr_store = DaskMSStore(f"s3://{s3_bucket_name}/measurementset.MS", key=minio_user_key, secret=minio_user_key, client_kwargs={ "endpoint_url": minio_url.geturl(), "region_name": "af-cpt", }) # NOTE(sjperkins) # Review this interface spw_store = zarr_store.subtable_store("SPECTRAL_WINDOW") ant_store = zarr_store.subtable_store("ANTENNA") return zarr_tester(ms, spw_table, ant_table, zarr_store, spw_store, ant_store)
def test_local_store(tmp_path): zarr = pytest.importorskip("zarr") payload = "How now brown cow" filename = "cow.txt" (tmp_path / filename).write_text(payload) (tmp_path / "foo.txt").write_text(payload) (tmp_path / "bar.txt").write_text(payload) (tmp_path / "qux.txt").write_text(payload) store = DaskMSStore(str(tmp_path)) store.fs.mkdir(f"{store.full_path}{store.fs.sep}bob", exist_ok=True) assert store.map[filename] == payload.encode("utf-8") root = zarr.group(store=store.map) data = root.require_dataset( "MODEL_DATA", # noqa shape=1000, dtype=np.complex128)
def xds_from_parquet(store, columns=None, chunks=None, **kwargs): if isinstance(store, DaskMSStore): pass elif isinstance(store, (str, Path)): store = DaskMSStore(f"{store}", **kwargs.pop("storage_options", {})) else: raise TypeError(f"store '{store}' must be " f"Path, str or DaskMSStore") # If any kwargs are added, they should be popped prior to this check. if len(kwargs) > 0: warnings.warn( f"The following unsupported kwargs were ignored in " f"xds_from_parquet: {kwargs}", UserWarning) columns = promote_columns(columns) if chunks is None: pass elif isinstance(chunks, (tuple, list)): if len(chunks) == 0 or any(not isinstance(c, dict) for c in chunks): raise TypeError("chunks must be None or dict or list of dict") elif isinstance(chunks, dict): chunks = [chunks] else: raise TypeError("chunks must be None or dict or list of dict") table_path = "" if store.table else "MAIN" fragments = list(map(Path, store.rglob("*.parquet"))) ds_cfg = defaultdict(list) # Iterate over all parquet files in the directory tree # and group them by partition partition_schemas = set() for fragment in fragments: *partitions, _ = fragment.relative_to(Path(table_path)).parts fragment = ParquetFileProxy(store, str(fragment)) fragment_meta = fragment.metadata metadata = json.loads(fragment_meta.metadata[DASKMS_METADATA.encode()]) partition_meta = metadata[DASKMS_PARTITION_KEY] partition_meta = tuple(tuple((f, v)) for f, v in partition_meta) partitions = _partition_values(partitions, partition_meta) partition_schemas.add(partition_meta) ds_cfg[partitions].append(fragment) # Sanity check partition schemas of all parquet files if len(partition_schemas) == 0: raise ValueError(f"No parquet files found in {store.path}") elif len(partition_schemas) != 1: raise ValueError(f"Multiple partitions discovered {partition_schemas}") partition_schemas = partition_schemas.pop() datasets = [] # Now create a dataset per partition for p, (partition, fragments) in enumerate(sorted(ds_cfg.items())): fragments = list(sorted(fragments)) column_arrays = defaultdict(list) fragment_rows = [f.metadata.num_rows for f in fragments] # Returns a dictionary of lists mapping fragments to partitions. partition_chunks = partition_chunking(p, fragment_rows, chunks) for pieces in partition_chunks.values(): chunk_fragments = [fragments[i] for i, _ in pieces] chunk_ranges = [r for _, r in pieces] chunk_metas = [f.metadata for f in chunk_fragments] rows = sum(end - start for start, end in chunk_ranges) # NOTE(JSKenyon): This assumes that the schema/fields are # consistent between fragments. This should be ok. exemplar_schema = chunk_metas[0].schema.to_arrow_schema() exemplar_fields = { n: exemplar_schema.field(n) for n in exemplar_schema.names } for column, field in column_iterator(exemplar_fields, columns): field_metadata = field.metadata[DASKMS_METADATA.encode()] field_metadata = json.loads(field_metadata) dims = tuple(field_metadata["dims"]) if isinstance(field.type, TensorType): shape = (rows, ) + field.type.shape else: shape = (rows, ) assert len(shape) == len(dims) dtype = field.type.to_pandas_dtype() meta = np.empty((0, ) * len(dims), dtype) new_axes = {d: s for d, s in zip(dims, shape)} read = da.blockwise(fragment_reader, dims, chunk_fragments, None, chunk_ranges, None, column, None, shape, None, dtype, None, adjust_chunks={"row": rows}, new_axes=new_axes, meta=meta) column_arrays[column].append((read, dims)) data_vars = {} for column, values in column_arrays.items(): arrays, array_dims = zip(*values) array_dims = set(array_dims) if not len(array_dims) == 1: raise ValueError(f"Inconsistent array dimensions " f"{array_dims} for {column}") data_vars[column] = (array_dims.pop(), da.concatenate(arrays)) attrs = dict(partition) attrs[DASKMS_PARTITION_KEY] = partition_schemas datasets.append(Dataset(data_vars, attrs=attrs)) return datasets
def xds_from_table(table_name, columns=None, index_cols=None, group_cols=None, **kwargs): """ Create multiple :class:`xarray.Dataset` objects from CASA table ``table_name`` with the rows lexicographically sorted according to the columns in ``index_cols``. If ``group_cols`` is supplied, the table data is grouped into multiple :class:`xarray.Dataset` objects, each associated with a permutation of the unique values for the columns in ``group_cols``. Notes ----- Both ``group_cols`` and ``index_cols`` should consist of columns that are part of the table index. However, this may not always be possible as CASA tables may not always contain indexing columns. The ``ANTENNA`` or ``SPECTRAL_WINDOW`` Measurement Set subtables are examples in which the ``row id`` serves as the index. Generally, calling .. code-block:: python antds = list(xds_from_table("WSRT.MS::ANTENNA")) is fine, since the data associated with each row of the ``ANTENNA`` table has the same shape and so a dask or numpy array can be constructed around the contents of the table. This may not be the case for the ``SPECTRAL_WINDOW`` subtable. Here, each row defines a separate spectral window, but each spectral window may contain different numbers of frequencies. In this case, it is probably better to group the subtable by ``row``. There is a *special* group column :code:`"__row__"` that can be used to group the table by row. .. code-block:: python for spwds in xds_from_table("WSRT.MS::SPECTRAL_WINDOW", group_cols="__row__"): ... If :code:`"__row__"` is used for grouping, then no other column may be used. It should also only be used for *small* tables, as the number of datasets produced, may be prohibitively large. Parameters ---------- table_name : str CASA table columns : list or tuple, optional Columns present on the returned dataset. Defaults to all if ``None`` index_cols : list or tuple, optional List of CASA table indexing columns. Defaults to :code:`()`. group_cols : list or tuple, optional List of columns on which to group the CASA table. Defaults to :code:`()` table_schema : dict or str or list of dict or str, optional A schema dictionary defining the dimension naming scheme for each column in the table. For example: .. code-block:: python { "UVW": {'dims': ('uvw',)}, "DATA": {'dims': ('chan', 'corr')}, } will result in the UVW and DATA arrays having dimensions :code:`('row', 'uvw')` and :code:`('row', 'chan', 'corr')` respectively. A string can be supplied, which will be matched against existing default schemas. Examples here include ``MS``, ``ANTENNA`` and ``SPECTRAL_WINDOW`` corresponding to ``Measurement Sets`` the ``ANTENNA`` subtable and the ``SPECTRAL_WINDOW`` subtable, respectively. By default, the end of ``table_name`` will be inspected to see if it matches any default schemas. It is also possible to supply a list of strings or dicts defining a sequence of schemas which are combined. Later elements in the list override previous elements. In the following example, the standard UVW MS component name scheme is overridden with "my-uvw". .. code-block:: python ["MS", {"UVW": {'dims': ('my-uvw',)}}] table_keywords : {False, True}, optional If True, returns table keywords. Changes return type of the function into a tuple column_keywords : {False, True}, optional If True return keywords for each column on the table Changes return type of the function into a tuple table_proxy : {False, True}, optional If True returns the Table Proxy associated with the Dataset taql_where : str, optional TAQL where clause. For example, to exclude auto-correlations .. code-block:: python xds_from_table("WSRT.MS", taql_where="ANTENNA1 != ANTENNA2") chunks : list of dicts or dict, optional A :code:`{dim: chunk}` dictionary, specifying the chunking strategy of each dimension in the schema. Defaults to :code:`{'row': 100000 }` which will partition the row dimension into chunks of 100000. * If a dict, the chunking strategy is applied to each group. * If a list of dicts, each element is applied to the associated group. The last element is extended over the remaining groups if there are insufficient elements. It's also possible to specify the individual chunks for multiple dimensions: .. code-block:: python {'row': (40000, 60000, 40000, 60000), 'chan': (16, 16, 16, 16), 'corr': (1, 2, 1)} The above chunks a 200,000 row, 64 channel and 4 correlation space into 4 x 4 x 3 = 48 chunks, but requires prior knowledge of dimensionality, probably obtained with an initial call to :func:`xds_from_table`. Returns ------- datasets : list of :class:`xarray.Dataset` datasets for each group, each ordered by indexing columns table_keywords : dict, optional Returned if ``table_keywords is True`` column_keywords : dict, optional Returned if ``column_keywords is True`` table_proxy : :class:`daskms.TableProxy`, optional Returned if ``table_proxy is True`` """ if isinstance(table_name, DaskMSStore): table_name = table_name.casa_path() else: store = DaskMSStore(table_name, **kwargs.pop("storage_options", {})) table_name = store.casa_path() columns = promote_columns(columns, []) index_cols = promote_columns(index_cols, []) group_cols = promote_columns(group_cols, []) return DatasetFactory(table_name, columns, group_cols, index_cols, **kwargs).datasets()
def xds_from_zarr(store, columns=None, chunks=None, **kwargs): """ Reads the zarr data store in `store` and returns list of Dataset's containing the data. Parameters ---------- store : str or Path Path containing the data columns : list of str or str or None Columns to read. `None` or `"ALL"` stores all columns on each dataset. Otherwise, a list of columns should be supplied. chunks: dict or list of dicts chunking schema for each dataset **kwargs: optional Returns ------- writes : Dataset or list of Datasets Dataset(s) representing write operations """ if isinstance(store, DaskMSStore): pass elif isinstance(store, (Path, str)): store = DaskMSStore(f"{store}", **kwargs.pop("storage_options", {})) else: raise TypeError(f"store '{store}' must be " f"Path, str or DaskMSStore") # If any kwargs are added, they should be popped prior to this check. if len(kwargs) > 0: warnings.warn( f"The following unsupported kwargs were ignored in " f"xds_from_zarr: {kwargs}", UserWarning) columns = promote_columns(columns) if chunks is None: pass elif isinstance(chunks, (tuple, list)): if not all(isinstance(v, dict) for v in chunks): raise TypeError("chunks must be None, a dict or a list of dicts") elif isinstance(chunks, dict): chunks = [chunks] else: raise TypeError("chunks must be None, a dict or a list of dicts") datasets = [] numpy_vars = [] # NOTE(JSKenyon): Iterating over all the zarr groups/arrays is VERY # expensive if the metadata has not been consolidated. zc.consolidate_metadata(store.map) table_path = store.table if store.table else "MAIN" table_group = zarr.open_consolidated(store.map)[table_path] for g, (group_name, group) in enumerate(sorted(table_group.groups(), key=group_sortkey)): group_attrs = decode_attr(dict(group.attrs)) dask_ms_attrs = group_attrs.pop(DASKMS_ATTR_KEY) natural_chunks = dask_ms_attrs["chunks"] group_chunks = {d: tuple(dc) for d, dc in natural_chunks.items()} if chunks: # Defer to user-supplied chunking strategy try: group_chunks.update(chunks[g]) except IndexError: group_chunks.update(chunks[-1]) # Reuse last chunking. pass data_vars = {} coords = {} for name, zarray in column_iterator(group, columns): attrs = decode_attr(dict(zarray.attrs[DASKMS_ATTR_KEY])) dims = attrs["dims"] coordinate = attrs.get("coordinate", False) array_chunks = tuple( group_chunks.get(d, s) for d, s in zip(dims, zarray.shape)) array_chunks = da.core.normalize_chunks(array_chunks, zarray.shape) ext_args = extent_args(dims, array_chunks) token_name = f"read~{name}-{tokenize(zarray, *ext_args)}" read = da.blockwise(zarr_getter, dims, zarray, None, *ext_args, concatenate=False, name=token_name, meta=np.empty((0, ) * zarray.ndim, zarray.dtype)) read = inlined_array(read, ext_args[::2]) var = Variable(dims, read, attrs) (coords if coordinate else data_vars)[name] = var # Save numpy arrays for reification typ = decode_type(attrs["array_type"]) if typ is np.ndarray: numpy_vars.append(var) elif typ is da.Array: pass else: raise TypeError(f"Unknown array_type '{attrs['array_type']}'") datasets.append(Dataset(data_vars, coords=coords, attrs=group_attrs)) # Reify any numpy arrays directly into their variables for v, a in zip(numpy_vars, dask.compute(v.data for v in numpy_vars)[0]): v.data = a return datasets
def xds_to_zarr(xds, store, columns=None, rechunk=False, **kwargs): """ Stores a dataset of list of datasets defined by `xds` in file location `store`. Parameters ---------- xds : Dataset or list of Datasets Data store : str or Path Path to store the data columns : list of str or str or None Columns to store. `None` or `"ALL"` stores all columns on each dataset. Otherwise, a list of columns should be supplied. All coordinates associated with a specified column will be written automatically. rechunk : bool Controls whether dask arrays should be automatically rechunked to be consistent with existing on-disk zarr arrays while writing to disk. **kwargs : optional Returns ------- writes : Dataset A Dataset representing the write operations """ if isinstance(store, DaskMSStore): pass elif isinstance(store, (Path, str)): store = DaskMSStore(f"{store}", **kwargs.pop("storage_options", {})) else: raise TypeError(f"store '{store}' must be " f"Path, str or DaskMSStore") # If any kwargs are added, they should be popped prior to this check. if len(kwargs) > 0: warnings.warn( f"The following unsupported kwargs were ignored in " f"xds_to_zarr: {kwargs}", UserWarning) columns = promote_columns(columns) if isinstance(xds, Dataset): xds = [xds] elif isinstance(xds, (tuple, list)): if not all(isinstance(ds, Dataset) for ds in xds): raise TypeError("xds must be a Dataset or list of Datasets") else: raise TypeError("xds must be a Dataset or list of Datasets") write_datasets = [] for di, ds in enumerate(xds): data_vars, coords = select_vars_and_coords(ds, columns) # Create a new ds which is consistent with what we want to write. ds = Dataset(data_vars, coords=coords, attrs=ds.attrs) ds, group = prepare_zarr_group(di, ds, store, rechunk=rechunk) data_vars = dict(_gen_writes(ds.data_vars, ds.chunks, group)) # Include coords in the write dataset so they're reified data_vars.update( dict(_gen_writes(ds.coords, ds.chunks, group, indirect_dims=True))) # Transfer any partition information over to the write dataset partition = ds.attrs.get(DASKMS_PARTITION_KEY, False) if not partition: attrs = None else: attrs = { DASKMS_PARTITION_KEY: partition, **{k: getattr(ds, k) for k, _ in partition} } write_datasets.append(Dataset(data_vars, attrs=attrs)) return write_datasets
def xds_to_table(xds, table_name, columns="ALL", descriptor=None, table_keywords=None, column_keywords=None, table_proxy=False): """ Generates a list of Datasets representing a write operations from the specified arrays in :class:`xarray.Dataset`'s into the CASA table columns specified by ``table_name`` and ``columns``. This is lazy operation -- it is only execute when a :meth:`dask.compute` or :meth:`xarray.Dataset.compute` method is called. Parameters ---------- xds : :class:`xarray.Dataset` or list of :class:`xarray.Dataset` dataset(s) containing the specified columns. If a list of datasets is provided, the concatenation of the columns in sequential datasets will be written. table_name : str CASA table path columns : tuple or list or "ALL" list of column names to write to the table. "ALL" is a special marker which specifies that all columns should be written. If you wish to write an "ALL" array to a column, use :code:`columns=['ALL']` descriptor : None or \ :class:`~daskms.descriptors.builder.AbstractBuilderFactory` or \ str A class describing how CASA table descriptors and data managers are constructors. Some defaults are available such as `ms` and `ms_subtable`. If None, defaults are used. table_keywords : dict, optional Dictionary of table keywords to add to existing keywords. The operation is performed immediately, not lazily. column_keywords : dict, optional Dictionary of :code:`{column: keywords}` to add to existing column keywords. The operation is performed immediately, not lazily. table_proxy : {False, True} If True returns the table_proxy Returns ------- write_datasets : list of :class:`xarray.Dataset` Datasets containing arrays representing write operations into a CASA Table table_proxy : :class:`daskms.TableProxy`, optional The Table Proxy associated with the datasets """ if isinstance(table_name, DaskMSStore): store = table_name else: store = DaskMSStore(table_name) table_name = store.casa_path() # Promote dataset to a list if not isinstance(xds, (tuple, list)): xds = [xds] # Not writing to an existing dataset so we drop ROWID to ensure that rows # get added correctly. TODO: This may be a little brittle - we could # consider altering the functionality in writes.py. if not store.exists(): xds = [ds.drop_vars("ROWID", errors="ignore") for ds in xds] if not isinstance(columns, (tuple, list)): if columns != "ALL": columns = [columns] # Write the datasets out_ds = write_datasets(table_name, xds, columns, descriptor=descriptor, table_keywords=table_keywords, column_keywords=column_keywords, table_proxy=table_proxy) # Unpack table proxy if it was requested if table_proxy is True: assert isinstance(out_ds, tuple) out_ds, tp = out_ds assert isinstance(tp, TableProxy) else: tp = None # Repack the Table Proxy if table_proxy is True: return out_ds, tp return out_ds
def _check_output_path(output: str): return DaskMSStore(output)
def test_xds_to_parquet_local(ms, tmp_path_factory, spw_table, ant_table): store = tmp_path_factory.mktemp("parquet_store") / "out.parquet" # antenna_store = store.parent / f"{store.name}::ANTENNA" # spw_store = store.parent / f"{store.name}::SPECTRAL_WINDOW" return parquet_tester(ms, DaskMSStore(store))
def xds_to_parquet(xds, store, columns=None, **kwargs): if isinstance(store, DaskMSStore): pass elif isinstance(store, (str, Path)): store = DaskMSStore(f"{store}", **kwargs.pop("storage_options", {})) else: raise TypeError(f"store '{store}' must be " f"Path, str or DaskMSStore") # If any kwargs are added, they should be popped prior to this check. if len(kwargs) > 0: warnings.warn( f"The following unsupported kwargs were ignored in " f"xds_to_parquet: {kwargs}", UserWarning) columns = promote_columns(columns) if isinstance(xds, Dataset): xds = [xds] elif isinstance(xds, (tuple, list)): if not all(isinstance(ds, Dataset) for ds in xds): raise TypeError("xds must be a Dataset or list of Datasets") else: raise TypeError("xds must be a Dataset or list of Datasets") datasets = [] base_schema = ArrowSchema.from_datasets(xds) for ds_id, ds in enumerate(xds): arrow_schema = base_schema.with_attributes(ds) fragment = ParquetFragment(store, store.table, arrow_schema, ds_id) chunk_ids = da.arange(len(ds.chunks["row"]), chunks=1) args = [chunk_ids, ("row", )] data_var_it = column_iterator(ds.data_vars, columns) coord_it = column_iterator(ds.coords, columns) for column, variable in itertools.chain(data_var_it, coord_it): if not isinstance(variable.data, da.Array): raise ValueError(f"Column {column} does not " f"contain a dask Array") if len(variable.dims[0]) == 0 or variable.dims[0] != "row": raise ValueError(f"Column {column} dimensions " f"{variable.dims} don't start with 'row'") args.extend((column, None, variable.data, variable.dims)) for dim, chunk in zip(variable.dims[1:], variable.data.chunks[1:]): if len(chunk) != 1: raise ValueError(f"Chunking in {dim} is not yet " f"supported.") writes = da.blockwise(fragment.write, ("row", ), *args, align_arrays=False, adjust_chunks={"row": 1}, meta=np.empty((0, ), bool)) writes = inlined_array(writes, chunk_ids) # Transfer any partition information over to the write dataset partition = ds.attrs.get(DASKMS_PARTITION_KEY, False) if not partition: attrs = None else: attrs = { DASKMS_PARTITION_KEY: partition, **{k: getattr(ds, k) for k, _ in partition} } datasets.append(Dataset({"WRITE": (("row", ), writes)}, attrs=attrs)) return datasets