def compress_and_save_to_zarr(ds, mode='a'): # Chunk the array thinking about how we access spatial slices # Here we have assumed we don't use forecast time slices or many step slices chunk_dict = {'time': 1, 'step': 10, 'y': -1, 'x': -1} if 'isobaricInhPa' in ds.dims: chunk_dict['isobaricInhPa'] = 5 ds = ds.chunk(chunk_dict) if mode == 'w': # if this is the first addition to the zarr file use this # This encoding good compression and was as fast to load as any other # set level. It also took a reasonably short amount of time to encode # compared to level 9 for only a couple of percent more stored data. encoding = { var_name: { 'filters': [zarr.Delta(dtype='float32')], 'compressor': zarr.Blosc(cname='zstd', clevel=4, shuffle=zarr.Blosc.AUTOSHUFFLE) } for var_name in ds.keys() } gcsmap = gcsfs.mapping.GCSMap(gcssavepath, gcs=fs, check=False, create=True) ds.to_zarr(store=gcsmap, consolidated=True, encoding=encoding) elif mode == 'a': # if we are appending to an existing zarr file use this gcsmap = gcsfs.mapping.GCSMap(gcssavepath, gcs=fs, check=True, create=False) ds.to_zarr(store=gcsmap, append_dim='time', consolidated=True) else: raise ValueError
nc_name="observation_number", old_nc_name=["n", "Eddy_tsp"], nc_type="uint16", nc_dims=("obs", ), nc_attr=dict( long_name="Eddy temporal index in a trajectory", comment= "Observation sequence number, days starting at the eddy first detection", ), ), contour_lon_e=dict( attr_name=None, nc_name="effective_contour_longitude", old_nc_name=["contour_lon_e"], nc_type="f4", filters=[zarr.Delta("i2")], output_type="i2", scale_factor=0.01, add_offset=180.0, nc_dims=("obs", "NbSample"), nc_attr=dict( long_name="Effective Contour Longitudes", comment="Longitudes of the effective contour", units="degrees_east", axis="X", ), ), contour_lat_e=dict( attr_name=None, nc_name="effective_contour_latitude", old_nc_name=["contour_lat_e"],
n=dict(attr_name=None, nc_name='observation_number', old_nc_name=['n', 'Eddy_tsp'], nc_type='uint16', nc_dims=('obs', ), nc_attr=dict( longname='observation number', units='ordinal', description= 'Observation sequence number, days from eddy first detection', )), contour_lon_e=dict(attr_name=None, nc_name='effective_contour_longitude', old_nc_name=['contour_lon_e'], nc_type='f4', filters=[zarr.Delta('i2')], output_type='i2', scale_factor=numpy.float32(0.01), add_offset=180, nc_dims=('obs', 'NbSample'), nc_attr=dict( longname='effective contour longitudes', description='Longitudes of effective contour', units='degrees_east', axis='X', )), contour_lat_e=dict(attr_name=None, nc_name='effective_contour_latitude', old_nc_name=['contour_lat_e'], nc_type='f4', filters=[zarr.Delta('i2')],