def write_series(self, group: zarr.Group, name: str, array: np.ndarray, data_type: str) -> None: if data_type == 'data_frame': if not is_categorical_dtype( array) and name != '_index' and is_string_dtype(array): keywords = set(array) if len(keywords ) <= array.size / 10.0: # at least 10x reduction array = pd.Categorical(array, categories=natsorted(keywords)) if is_categorical_dtype(array): # write category keys categories = group.require_group('_categories') values = array.categories.values if isinstance(values[0], bytes): values = np.array([x.decode() for x in values], dtype=object) dtype = str if values.dtype.kind == 'O' else values.dtype categories.create_dataset(name, data=values, shape=values.shape, chunks=calc_chunk(values.shape), dtype=dtype, compressor=COMPRESSOR, overwrite=True) # write codes codes_arr = group.create_dataset(name, data=array.codes, shape=array.codes.shape, chunks=calc_chunk( array.codes.shape), dtype=array.codes.dtype, compressor=COMPRESSOR, overwrite=True) codes_arr.attrs['ordered'] = array.ordered return None dtype = str if array.dtype.kind == 'O' else array.dtype group.create_dataset(name, data=array, shape=array.shape, chunks=calc_chunk(array.shape), dtype=dtype, compressor=COMPRESSOR, overwrite=True)
def write_unimodal_data(self, group: zarr.Group, name: str, data: UnimodalData, overwrite: bool = True) -> None: """ Write UnimodalData """ sub_group = group.require_group(name, overwrite = overwrite) attrs_dict = {'data_type': 'UnimodalData', '_cur_matrix': data.current_matrix()} sub_group.attrs.update(**attrs_dict) self.write_dataframe(sub_group, 'barcode_metadata', data.barcode_metadata) self.write_dataframe(sub_group, 'feature_metadata', data.feature_metadata) if overwrite or data.matrices.is_dirty(): self.write_mapping(sub_group, 'matrices', data.matrices, overwrite = overwrite) if overwrite or data.metadata.is_dirty(): self.write_mapping(sub_group, 'metadata', data.metadata, overwrite = overwrite) if overwrite or data.barcode_multiarrays.is_dirty(): self.write_mapping(sub_group, 'barcode_multiarrays', data.barcode_multiarrays, overwrite = overwrite) if overwrite or data.feature_multiarrays.is_dirty(): self.write_mapping(sub_group, 'feature_multiarrays', data.feature_multiarrays, overwrite = overwrite)
def write_mapping(self, group: zarr.Group, name: str, mapping: dict, overwrite=True) -> None: sub_group = group.require_group(name, overwrite=overwrite) scalar_dict = sub_group.attrs.pop( 'scalar', {}) # if overwrite == True, there should be no 'scalar' def _write_one_pair(key, value): if is_scalar(value): scalar_dict[key] = value elif isinstance(value, np.ndarray): self.write_array(sub_group, key, value) elif isinstance(value, pd.DataFrame): self.write_dataframe(sub_group, key, value) elif is_dict_like(value): self.write_mapping(sub_group, key, value) elif issparse(value): assert isinstance(value, csr_matrix) self.write_csr(sub_group, key, value) else: # assume value is either list or tuple, converting it to np.ndarray self.write_array( sub_group, key, value.astype(str) if is_categorical_dtype(value) else np.array(value)) if overwrite: for key, value in mapping.items(): _write_one_pair(key, value) else: for key in mapping.deleted: if key in scalar_dict: del scalar_dict[key] else: del sub_group[key] for key in mapping.modified: _write_one_pair(key, mapping[key]) attrs_dict = {'data_type': 'dict'} if len(scalar_dict) > 0: attrs_dict['scalar'] = scalar_dict sub_group.attrs.update(**attrs_dict)