def test_max_str_len(dtype, chunks, backend, data): shape = data.draw(st.lists(st.integers(0, 8), min_size=0, max_size=3)) ndim = len(shape) x = data.draw(arrays(dtype=dtype if dtype != "O" else "U", shape=shape)) x = backend(x) if dtype == "O": x = x.astype(object) if chunks is not None and backend is xr.DataArray: x = x.chunk(chunks=(chunks, ) * ndim) if chunks is not None and backend is da.array: x = x.rechunk((chunks, ) * ndim) if x.size == 0: with pytest.raises( ValueError, match="Max string length cannot be calculated for empty array" ): max_str_len(x) else: expected = max(map(len, np.asarray(x).ravel())) actual = int(max_str_len(x)) assert expected == actual
def update_dataset(self, ds: xr.Dataset, add_str_max_length_attrs: bool = False) -> None: # cyvcf2 represents missing Integer values as the minimum int32 value # so change these to be the fill value if self.array.dtype == np.int32: self.array[self.array == np.iinfo(np.int32).min] = self.fill_value ds[self.variable_name] = (self.dims, self.array) if len(self.description) > 0: ds[self.variable_name].attrs["comment"] = self.description if add_str_max_length_attrs and self.array.dtype.kind == "O": max_length = max_str_len(self.array) ds.attrs[f"max_length_{self.variable_name}"] = max_length
def test_max_str_len__invalid_dtype(): with pytest.raises(ValueError, match="Array must have string dtype"): max_str_len(np.array([1]))