Esempio n. 1
0
def compress_and_save_to_zarr(ds, mode='a'):
    # Chunk the array thinking about how we access spatial slices
    # Here we have assumed we don't use forecast time slices or many step slices
    chunk_dict = {'time': 1, 'step': 10, 'y': -1, 'x': -1}
    if 'isobaricInhPa' in ds.dims:
        chunk_dict['isobaricInhPa'] = 5
    ds = ds.chunk(chunk_dict)

    if mode == 'w':  # if this is the first addition to the zarr file use this
        # This encoding good compression and was as fast to load as any other
        # set  level. It also took a reasonably short amount of time to encode
        # compared to level 9 for only a couple of percent more stored data.
        encoding = {
            var_name: {
                'filters': [zarr.Delta(dtype='float32')],
                'compressor':
                zarr.Blosc(cname='zstd',
                           clevel=4,
                           shuffle=zarr.Blosc.AUTOSHUFFLE)
            }
            for var_name in ds.keys()
        }
        gcsmap = gcsfs.mapping.GCSMap(gcssavepath,
                                      gcs=fs,
                                      check=False,
                                      create=True)
        ds.to_zarr(store=gcsmap, consolidated=True, encoding=encoding)

    elif mode == 'a':  # if we are appending to an existing zarr file use this
        gcsmap = gcsfs.mapping.GCSMap(gcssavepath,
                                      gcs=fs,
                                      check=True,
                                      create=False)
        ds.to_zarr(store=gcsmap, append_dim='time', consolidated=True)
    else:
        raise ValueError
Esempio n. 2
0
     nc_name="observation_number",
     old_nc_name=["n", "Eddy_tsp"],
     nc_type="uint16",
     nc_dims=("obs", ),
     nc_attr=dict(
         long_name="Eddy temporal index in a trajectory",
         comment=
         "Observation sequence number, days starting at the eddy first detection",
     ),
 ),
 contour_lon_e=dict(
     attr_name=None,
     nc_name="effective_contour_longitude",
     old_nc_name=["contour_lon_e"],
     nc_type="f4",
     filters=[zarr.Delta("i2")],
     output_type="i2",
     scale_factor=0.01,
     add_offset=180.0,
     nc_dims=("obs", "NbSample"),
     nc_attr=dict(
         long_name="Effective Contour Longitudes",
         comment="Longitudes of the effective contour",
         units="degrees_east",
         axis="X",
     ),
 ),
 contour_lat_e=dict(
     attr_name=None,
     nc_name="effective_contour_latitude",
     old_nc_name=["contour_lat_e"],
Esempio n. 3
0
 n=dict(attr_name=None,
        nc_name='observation_number',
        old_nc_name=['n', 'Eddy_tsp'],
        nc_type='uint16',
        nc_dims=('obs', ),
        nc_attr=dict(
            longname='observation number',
            units='ordinal',
            description=
            'Observation sequence number, days from eddy first detection',
        )),
 contour_lon_e=dict(attr_name=None,
                    nc_name='effective_contour_longitude',
                    old_nc_name=['contour_lon_e'],
                    nc_type='f4',
                    filters=[zarr.Delta('i2')],
                    output_type='i2',
                    scale_factor=numpy.float32(0.01),
                    add_offset=180,
                    nc_dims=('obs', 'NbSample'),
                    nc_attr=dict(
                        longname='effective contour longitudes',
                        description='Longitudes of effective contour',
                        units='degrees_east',
                        axis='X',
                    )),
 contour_lat_e=dict(attr_name=None,
                    nc_name='effective_contour_latitude',
                    old_nc_name=['contour_lat_e'],
                    nc_type='f4',
                    filters=[zarr.Delta('i2')],