Exemple #1
0
    def setup(self):

        requires_dask()

        self.make_ds()
        self.format = "NETCDF3_64BIT"
        xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
def save_segments_to_netcdf(segments, output_path, mission_name=''):
    full_output_path = "%s/%s/" % (output_path, mission_name)
    if not os.path.isdir(full_output_path):
        os.makedirs(full_output_path)
    output_filenames = [full_output_path + mission_name +
                        '_segment_%02d.nc' % (ns + 1)  for ns in range(len(segments))]
    xr.save_mfdataset(segments, output_filenames)
Exemple #3
0
def writefile(ds, fs, io_format, root, fname):
    filename = f'sst.{fname}'
    # if isinstance(fs, fsspec.AbstractFileSystem):
    if io_format == 'zarr':
        if isinstance(fs, fsspec.AbstractFileSystem):
            store = fs.get_mapper(root=f'{root}/{filename}.zarr',
                                  check=False,
                                  create=True)
        else:
            store = f'{root}/test1/{filename}.zarr'
        ds = ds.to_zarr(
            store,
            encoding={'sst': {
                'compressor': None
            }},
            consolidated=True,
            compute=False,
            mode='w',
        )
        ds.compute()
    elif io_format == 'netcdf':
        ds_list = list(split_by_chunks(ds))
        dss = [item[1] for item in ds_list]
        paths = [
            create_filepath(ds, prefix=filename, root_path=f'{root}/test1')
            for ds in dss
        ]
        xr.save_mfdataset(datasets=dss,
                          paths=paths,
                          engine='h5netcdf',
                          parallel=True)
        if isinstance(fs, fsspec.AbstractFileSystem):
            fs.upload(lpath=f'{root}/test1', rpath=f'{root}/', recursive=True)

    return filename
Exemple #4
0
    def to_restart(self,
                   savepath='.',
                   nxpe=None,
                   nype=None,
                   original_splitting=False):
        """
        Write out final timestep as a set of netCDF BOUT.restart files.

        If processor decomposition is not specified then data will be saved
        using the decomposition it had when loaded.

        Parameters
        ----------
        savepath : str
        nxpe : int
        nype : int
        """

        # Set processor decomposition if not given
        if original_splitting:
            if any([nxpe, nype]):
                raise ValueError('Inconsistent choices for domain '
                                 'decomposition.')
            else:
                nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE']

        # Is this even possible without saving the guard cells?
        # Can they be recreated?
        restart_datasets, paths = _split_into_restarts(self.data, savepath,
                                                       nxpe, nype)
        with ProgressBar():
            save_mfdataset(restart_datasets, paths, compute=True)
        return
Exemple #5
0
    def setup(self):

        requires_dask()

        self.make_ds()
        self.format = 'NETCDF3_64BIT'
        xr.save_mfdataset(self.ds_list, self.filenames_list,
                          format=self.format)
Exemple #6
0
def save_density_netcdf(rho_da: xr.DataArray) -> None:
    """Passes in density datarray, saves it in a reasonable way.

    Args:
        rho_da (xr.DataArray): [description]
    """

    xr.save_mfdataset([rho_da], ["nc/Density.nc"], format="NETCDF4")
Exemple #7
0
 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)),
                 original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)
Exemple #8
0
def netcdf_local_paths(daily_xarray_dataset, tmpdir_factory, request):
    """Return a list of paths pointing to netcdf files."""
    tmp_path = tmpdir_factory.mktemp("netcdf_data")
    # copy needed to avoid polluting metadata across multiple tests
    datasets, fnames = _split_up_files_by_day(daily_xarray_dataset.copy(), request.param)
    full_paths = [tmp_path.join(fname) for fname in fnames]
    xr.save_mfdataset(datasets, [str(path) for path in full_paths])
    items_per_file = {"D": 1, "2D": 2}[request.param]
    return full_paths, items_per_file
Exemple #9
0
def x_grad() -> None:
    """
    Save x grad.
    """
    density_da = xr.open_mfdataset("nc/density.nc", decode_cf=False).astype("float32")
    grad_da = density_da.Density.differentiate(cst.X_COORD).astype("float32")
    density_da["x_grad"] = grad_da
    grad_ds = density_da.drop("Density").astype("float32")
    xr.save_mfdataset([grad_ds], ["nc/density_grad_x.nc"], format="NETCDF4")
Exemple #10
0
def write_netcdf(ds, netcdf_dir, netcdf_prefix):

    ds.attrs['history'] = ds.attrs[
        'history'] + ', written to NetCDF files using write_netcdf:' + datetime.now(
        ).strftime("%Y-%m-%d %H:%M:%S")

    prefix = os.path.join(netcdf_dir, netcdf_prefix)
    years, datasets = zip(*ds.groupby('time.year'))
    paths = [prefix + '%s.nc' % y for y in years]
    xr.save_mfdataset(datasets, paths)
def netcdf_local_paths(daily_xarray_dataset, tmpdir_factory, request):
    """Return a list of paths pointing to netcdf files."""
    tmp_path = tmpdir_factory.mktemp("netcdf_data")
    gb = daily_xarray_dataset.resample(time=request.param)
    _, datasets = zip(*gb)
    fnames = [f"{n:03d}.nc" for n in range(len(datasets))]
    paths = [tmp_path.join(fname) for fname in fnames]
    print(len(paths))
    xr.save_mfdataset(datasets, [str(path) for path in paths])
    return paths
def netcdf_local_paths_by_variable(daily_xarray_dataset, tmpdir_factory,
                                   request):
    """Return a list of paths pointing to netcdf files."""
    tmp_path = tmpdir_factory.mktemp("netcdf_data")
    datasets, fnames, fnames_by_variable = _split_up_files_by_variable_and_day(
        daily_xarray_dataset.copy(), request.param)
    full_paths = [tmp_path.join(fname) for fname in fnames]
    xr.save_mfdataset(datasets, [str(path) for path in full_paths])
    items_per_file = {"D": 1, "2D": 2}[request.param]
    path_format = str(tmp_path) + "/{variable}_{n:03d}.nc"
    return full_paths, items_per_file, fnames_by_variable, path_format
Exemple #13
0
def metrics_save(metrics, odir, fname, mf_save=False, **kwargs):
    for kk in metrics.keys():
        if mf_save:
            years, datasets = zip(*metrics[kk].groupby('time.year'))
            paths = [
                pjoin(odir, '%04i_%s_%s.nc' % (y, fname, kk)) for y in years
            ]
            xr.save_mfdataset(datasets, paths)
        else:
            metrics[kk].to_netcdf(pjoin(odir, '%s_%s.nc' % (fname, kk)),
                                  **kwargs)
Exemple #14
0
def split_netcdf(file2split):
    ''' splits concatenated file (SURF.nc, PLEV.ncin to monthly files 
	as originally downloaded from CDS, for use in tscale_fast'''
    basename = file2split.split(".nc")[0]
    ds = xr.open_dataset(file2split)
    dates, datasets = zip(*ds.resample(time='1M').mean('time').groupby('time'))
    filenames = [
        basename + "_" + pd.to_datetime(date).strftime('%Y%m') + '.nc'
        for date in dates
    ]
    xr.save_mfdataset(datasets, filenames)
Exemple #15
0
def save_years_wrapper(ds,
                       odir,
                       name,
                       start_year,
                       timesteps_per_yr=1,
                       timedim='time',
                       **kwargs):
    if not os.path.isdir(odir):
        os.mkdir(odir)

    years = list(range(start_year, start_year + len(ds[timedim])))
    datasets = [ds[{timedim: a}] for a in range(len(ds[timedim]))]
    paths = [pjoin(odir, '%04i.' + name) % y for y in years]
    xr.save_mfdataset(datasets, paths, **kwargs)
Exemple #16
0
def merge_and_save(k_clusters: int = 5, pca: int = 3) -> None:
    """Merge and save joint."""

    pca_ds = xr.open_mfdataset(
        io.return_folder(k_clusters, pca) + "*.nc",
        concat_dim=cst.T_COORD,
        combine="by_coords",
        chunks={cst.T_COORD: 1},
        data_vars="minimal",
        coords="minimal",
        compat="override",
    )
    xr.save_mfdataset(
        [pca_ds], [io.return_name(k_clusters, pca) + ".nc"], format="NETCDF4"
    )
Exemple #17
0
def save_big_ERA5_dataset_as_yearly_files(file, time_dim='time',
                                          verbose=True):
    import xarray as xr
    ds = xr.open_dataset(file)
    years, datasets = zip(*ds.groupby("{}.year".format(time_dim)))
    savepath, filename = return_savepath_and_filename_from_filepath(file)
    paths = [savepath / (filename + '_{}.nc'.format(y)) for y in years]
    # paths = ["%s.nc" % y for y in years]
    if verbose:
        yrmin = min(years)
        yrmax = max(years)
        filemin = filename + '_{}.nc'.format(yrmin)
        filemax = filename + '_{}.nc'.format(yrmax)
        print('saving {} to {}.'.format(filemin, filemax))
    xr.save_mfdataset(datasets, paths)
    return
Exemple #18
0
    def save_mfdataset(self):
        """
        Use xarray.save_mfdataset to write multiple netcdf files.
        """
        dar1 = xarray.DataArray(np.random.randn(2, 3))
        dar2 = xarray.DataArray(np.random.randn(2, 3))

        data1 = xarray.Dataset({'foo': dar1, 'bar': ('x', [1, 2])})
        data2 = xarray.Dataset({'foo': dar2, 'bar': ('x', [1, 2])})

        file_names = [os.path.join(self.data_dir, f)
                      for f in ['data1.nc', 'data2.nc']]

        xarray.save_mfdataset([data1, data2], file_names)

        for f in file_names:
            os.remove(f)
def make_netcdf(station_dir,
                netcdf_dir,
                netcdf_prefix,
                station,
                download=False,
                overwrite=False):
    """
Create a netcdf file containing MLML historical seawater or weather data. The file will be created from csv and readme files already on disk, or they can be downloaded.

INPUT:
station_dir - string specifying the location of csv files (e.g. '/home/username/data/')
netcdf_dir - string specifying the location of netcdf files to be created (e.g. '/home/username/data/')
netcdf_prefix - string specifying filename pattern for netcdf files
                the year will be appended this prefix
                (e.g. 'moss_landing_' for moss_landing_2015.nc, moss_landing_2016.nc, etc.)
station     - either 'seawater' or 'weather' (default: 'seawater')
download    - boolean specifying whether to download new files
              (default: False)
overwrite   - boolean specifying whether to overwrite the existing files, only used if downloading new data (default: False)
    """

    # download new data, if specified
    if download == True:
        download_station_data(station_dir, station, overwrite)

    # read data in csv files to xarray dataset
    d = read_csv_data(station_dir, format='dataset')

    # specify location of readme file and add metadata to dataset
    readme_file = station_dir + '1_README.TXT'
    d = add_metadata(d, station, readme_file)

    # Additional processing
    d = cleanup_raw(d)
    d = add_flags(d)

    d.attrs['history'] = d.attrs[
        'history'] + 'netcdf file created using mlml.make_netcdf(station_dir' + station_dir + ',netcdf_dir=' + netcdf_dir + ',netcdf_prefix=' + netcdf_prefix + ',station=' + station + 'download=' + str(
            download) + ',overwrite=' + str(overwrite) + '): ' + datetime.now(
            ).strftime("%Y-%m-%d %H:%M:%S") + ')'

    # create netcdf files
    prefix = os.path.join(netcdf_dir, netcdf_prefix)
    years, datasets = zip(*d.groupby('time.year'))
    paths = [prefix + '%s.nc' % y for y in years]
    xr.save_mfdataset(datasets, paths)
Exemple #20
0
def writefile(ds, fs, io_format, root, fname):
    filename = f'sst.{fname}'
    if io_format == 'zarr':
        store = fs.get_mapper(root=f'{root}/{filename}.zarr', check=False, create=True)
        ds = ds.to_zarr(
            store,
            encoding={'sst': {'compressor': None}},
            consolidated=True,
            compute=False,
            mode='w',
        )
        ds.compute()
    elif io_format == 'netcdf':
        ds_list = list(split_by_chunks(ds))
        dss = [item[1] for item in ds_list]
        paths = [create_filepath(ds, prefix=filename, root_path=f'{root}') for ds in dss]
        xr.save_mfdataset(datasets=dss, paths=paths)
        if fs.protocol[0] == 's3':
            fs.upload(lpath=f'{root}', rpath=f'{root}/', recursive=True)

    return filename
Exemple #21
0
    def to_netcdf(self, savedir):
        """Save to netCDF4 files

        Args:
            savedir (str): Path to save files to
        """

        # Add dates
        self._subset_ds['date'] = self._subset_ds.time.to_pandas().dt.strftime(
            '%Y%m%d')

        # Write as multi-file dataset
        dates, datasets = zip(*self._subset_ds.groupby('date'))
        basename = self._ds.attrs['title'].split(':')[0].replace(' ', '.')
        filepaths = [
            os.path.abspath(
                os.path.join(savedir, '{}.{}.SUB.nc4'.format(basename, date)))
            for date in dates
        ]
        logger.debug("Writing to %s", os.path.abspath(savedir))
        xa.save_mfdataset(datasets, filepaths)
Exemple #22
0
def take_derivative_density(
    dimension: str = cst.Y_COORD, typ: str = "float32", engine: str = "h5netcdf"
) -> None:
    """
    Take derivative of density.

    Args:
        dimension (str, optional): [description]. Defaults to cst.Y_COORD.
        typ (str, optional): [description]. Defaults to "float32".
        engine (str, optional): [description]. Defaults to "h5netcdf".
    """

    chunk_d = {cst.T_COORD: 1, cst.Z_COORD: 52, cst.Y_COORD: 588, cst.X_COORD: 2160}

    density_ds = xr.open_mfdataset(
        "nc/density.nc",
        # engine=engine,
        # decode_cf=False,
        chunks=chunk_d,
        combine="by_coords",
        data_vars="minimal",
        coords="minimal",
        compat="override",
        engine=engine,
        parallel=True,
    ).astype(typ)

    grad_da = density_ds.Density.differentiate(dimension)
    # .astype(typ).chunk(chunks=chunk_d)

    name = "Density_Gradient_" + dimension
    grad_ds = grad_da.to_dataset().rename_vars({"Density": name})
    grad_ds[name].attrs["long_name"] = "Density Gradient " + dimension
    grad_ds[name].attrs["units"] = "kg m-3 box-1"

    # .astype(typ).chunk(chunks=chunk_d)
    xr.save_mfdataset(
        [grad_ds], ["nc/density_grad_" + dimension + ".nc"], format="NETCDF4"
    )
Exemple #23
0
def y_grad(set_ok: bool = False) -> None:
    """Take y grad.

    Args:
        set (bool, optional): take y gradient of density. Defaults to False.
    """
    density_da = xr.open_mfdataset(
        "nc/density.nc", decode_cf=False, parallel=True
    ).astype("float32")
    grad_da = (
        density_da.Density.astype("float32")
        .differentiate(cst.Y_COORD)
        .astype("float32")
    )
    del density_da
    if not set_ok:
        grad_da.to_netcdf("nc/density_grad_y_da.nc", engine="netcdf4")
    else:
        grad_ds = grad_da.to_dataset().astype("float32")
        # density_da['y_grad'] = grad_da
        # grad_ds = density_da.drop('Density')
        xr.save_mfdataset([grad_ds], ["nc/density_grad_y.nc"], format="NETCDF4")
Exemple #24
0
 def test_save_mfdataset_invalid(self):
     ds = Dataset()
     with self.assertRaisesRegexp(ValueError, 'cannot use mode'):
         save_mfdataset([ds, ds], ['same', 'same'])
     with self.assertRaisesRegexp(ValueError, 'same length'):
         save_mfdataset([ds, ds], ['only one path'])
Exemple #25
0
 def time_write_dataset_netcdf4(self):
     xr.save_mfdataset(self.ds_list,
                       self.filenames_list,
                       engine='netcdf4',
                       format=self.format)
Exemple #26
0
    # add some attributes for convenience to the stats
    conv_intensity.attrs['units'] = 'mm/hour'
    conv_mean.attrs['units'] = 'mm/hour'
    conv_area.attrs['units'] = '% of radar area'
    stra_intensity.attrs['units'] = 'mm/hour'
    stra_mean.attrs['units'] = 'mm/hour'
    stra_area.attrs['units'] = '% of radar area'

    # save as netcdf-files
    path = '/Users/mret0001/Desktop/'
    xr.save_mfdataset([
        xr.Dataset({'conv_intensity': conv_intensity}),
        xr.Dataset({'conv_rr_mean': conv_mean}),
        xr.Dataset({'conv_area': conv_area}),
        xr.Dataset({'stra_intensity': stra_intensity}),
        xr.Dataset({'stra_rr_mean': stra_mean}),
        xr.Dataset({'stra_area': stra_area})
    ], [
        path + 'conv_intensity.nc', path + 'conv_rr_mean.nc',
        path + 'conv_area.nc', path + 'stra_intensity.nc',
        path + 'stra_rr_mean.nc', path + 'stra_area.nc'
    ])

    # sanity check
    check = False
    if check:
        r = ds_rr.radar_estimated_rain_rate / 6.
        cr = r.where(ds_st.steiner_echo_classification == 2)
        cr = cr.where(cr != 0.)
        cr_1h = cr[
            9774:9780, :, :].load()  # the most precip hour in the 09/10-season
        # cr_1h = cr[9774, :, :].load()  # '2010-02-25T21:00:00'
Exemple #27
0
 def time_write_dataset_scipy(self):
     xr.save_mfdataset(self.ds_list, self.filenames_list,
                       engine='scipy',
                       format=self.format)
Exemple #28
0
 def time_write_dataset_netcdf4(self):
     xr.save_mfdataset(self.ds_list, self.filenames_list,
                       engine='netcdf4',
                       format=self.format)
Exemple #29
0
 def time_write_dataset_scipy(self):
     xr.save_mfdataset(self.ds_list,
                       self.filenames_list,
                       engine='scipy',
                       format=self.format)
def main(argv=None):
    # Creating an argparse object
    parser = argparse.ArgumentParser(description='ZPLSC/G echogram generator')

    # Creating input arguments
    parser.add_argument(
        '-s',
        '--site',
        dest='site',
        type=str,
        required=True,
        help='The OOI 8-letter site name for where the ZPLSC/G is located.')
    parser.add_argument(
        '-d',
        '--data_directory',
        dest='data_directory',
        type=str,
        required=True,
        help=
        'The path to the root directory below which the .01A or .raw files may be found.'
    )
    parser.add_argument(
        '-o',
        '--output_directory',
        dest='output_directory',
        type=str,
        required=True,
        help=
        'The path to the root directory below which the .nc file(s) and .png plot will be saved.'
    )
    parser.add_argument(
        '-dr',
        '--date_range',
        dest='dates',
        type=str,
        nargs='+',
        required=True,
        help=
        ('Date range to plot as either YYYYMM or YYYYMMDD. Specifying an end date is optional, '
         'it will be assumed to be 1 month or 1 day depending on input.'))
    parser.add_argument(
        '-zm',
        '--zpls_model',
        dest='zpls_model',
        type=str,
        required=True,
        help='Specifies the ZPLS instrument model, either AZFP or EK60.')
    parser.add_argument(
        '-xf',
        '--xml_file',
        dest='xml_file',
        type=str,
        required=False,
        help=
        'The path to .XML file used to process the AZFP data in the .01A files'
    )
    parser.add_argument('-tc',
                        '--tilt_correction',
                        dest='tilt_correction',
                        type=int,
                        required=False,
                        help='Apply tilt correction in degree(s)')
    parser.add_argument('-dd',
                        '--deployed_depth',
                        dest='deployed_depth',
                        type=int,
                        required=False,
                        help='The depth where the ZPLSC/G is located at')
    parser.add_argument('-cr',
                        '--colorbar_range',
                        dest='colorbar_range',
                        type=int,
                        nargs=2,
                        required=False,
                        help='Set colorbar range. Usage: "min" "max"')
    parser.add_argument(
        '-vr',
        '--vertical_range',
        dest='vertical_range',
        type=int,
        nargs=2,
        required=False,
        help='Set the range for the y-axis. Usage: "min" "max"')

    # parse the input arguments
    args = parser.parse_args(argv)
    site = args.site.upper()
    data_directory = os.path.abspath(args.data_directory)
    output_directory = os.path.abspath(args.output_directory)
    dates = args.dates
    zpls_model = args.zpls_model.upper()
    tilt_correction = args.tilt_correction
    deployed_depth = args.deployed_depth
    colorbar_range = args.colorbar_range
    vertical_range = args.vertical_range
    xml_file = args.xml_file
    if xml_file:
        xml_file = os.path.abspath(xml_file)

    # assign per site variables
    if site in site_config:
        # if tilt_correction flag is not set, set the tilt correction from the site configuration
        if tilt_correction is None:
            tilt_correction = site_config[site]['tilt_correction']
        # if deployed_depth flag is not set, set the deployed_depth from the site configuration
        if deployed_depth is None:
            deployed_depth = site_config[site]['deployed_depth']
        # if colorbar_range flag is not set, set the colorbar_range from the site configuration
        if colorbar_range is None:
            colorbar_range = site_config[site]['colorbar_range']
        # if vertical_range flag is not set, set the vertical_range from the site configuration
        if vertical_range is None:
            vertical_range = site_config[site]['vertical_range']
    elif site is not None:
        raise parser.error(
            'The site name was not found in the configuration dictionary.')

    # make sure the root output directory exists
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    # use the ZPLS model to determine how to process the data
    data = None
    if zpls_model not in ['AZFP', 'EK60']:
        raise ValueError(
            'The ZPLS model must be set as either AZFP or EK60 (case insensitive)'
        )
    else:
        if zpls_model == 'AZFP':
            data = process_azfp(site, data_directory, xml_file,
                                output_directory, dates, tilt_correction)

        if zpls_model == 'EK60':
            data = process_ek60(site, data_directory, output_directory, dates,
                                tilt_correction)

    # test to see if we have any data from the processing
    if not data:
        return None

    # save the full resolution data to daily NetCDF files
    file_name = set_file_name(site, dates)
    output_directory = os.path.join(output_directory,
                                    dates[0] + '-' + dates[1])

    # reset a couple data types (helps to control size of NetCDF files)
    data['range'] = data['range'].astype(np.float32)
    data['Sv'] = data['Sv'].astype(np.float32)

    # split the data into daily records
    days, datasets = zip(*data.groupby("ping_time.day"))

    # create a list of file names based on the day of the record
    start = datetime.strptime(dates[0], '%Y%m%d')
    stop = datetime.strptime(dates[1], '%Y%m%d')
    date_list = [
        start + timedelta(days=x) for x in range(0, (stop - start).days)
    ]
    nc_file = os.path.join(output_directory, file_name)
    nc_files = []
    for day in days:
        for dt in date_list:
            if dt.day == day:
                nc_files.append(nc_file +
                                "_Full_%s.nc" % dt.strftime('%Y%m%d'))

    # convert ping_time from a datetime64[ns] object to a float (seconds since 1970) and update the attributes
    for dataset in datasets:
        dataset['ping_time'] = dataset['ping_time'].values.astype(
            np.float64) / 10.0**9
        dataset.attrs = attributes['global']
        dataset.attrs['instrument_orientation'] = site_config[site][
            'instrument_orientation']

        for v in dataset.variables:
            dataset[v].attrs = attributes[v]

    # save the daily files
    xr.save_mfdataset(datasets,
                      nc_files,
                      mode='w',
                      format='NETCDF4',
                      engine='h5netcdf')

    # if a global mooring, create hourly averaged data records, otherwise create 15 minute records
    if 'HYPM' in site:
        # resample the data into a 60 minute, median averaged record, filling gaps less than 180 minutes
        avg = data.resample(ping_time='60Min').mean()
        avg = avg.interpolate_na(dim='ping_time', max_gap='180Min')
    else:
        # resample the data into a 15 minute, median averaged record, filling gaps less than 45 minutes
        avg = data.resample(ping_time='15Min').median()
        avg = avg.interpolate_na(dim='ping_time', max_gap='45Min')

    # generate the echogram
    long_name = site_config[site]['long_name']
    generate_echogram(avg,
                      site,
                      long_name,
                      deployed_depth,
                      output_directory,
                      file_name,
                      dates,
                      vertical_range=vertical_range,
                      colorbar_range=colorbar_range)

    # add the OOI logo as a watermark
    echogram = os.path.join(output_directory, file_name + '.png')
    echo_image = Image.open(echogram)
    ooi_image = Image.open('ooi-logo.png')
    width, height = echo_image.size
    transparent = Image.new('RGBA', (width, height), (0, 0, 0, 0))
    transparent.paste(echo_image, (0, 0))
    if max(vertical_range) > 99:
        transparent.paste(ooi_image, (96, 15), mask=ooi_image)
    else:
        transparent.paste(ooi_image, (80, 15), mask=ooi_image)

    # re-save the echogram with the added logo
    transparent.save(echogram)

    # save the averaged data
    avg['ping_time'] = avg['ping_time'].values.astype(np.float64) / 10.0**9
    avg.attrs = attributes['global']
    avg.attrs['instrument_orientation'] = site_config[site][
        'instrument_orientation']
    for v in avg.variables:
        avg[v].attrs = attributes[v]

    avg_file = nc_file + '_Averaged.nc'
    avg.to_netcdf(avg_file, mode='w', format='NETCDF4', engine='h5netcdf')
da_mod_avg_mon = da_mod_avg  # Already monthly means,

# In[ ]:

# Trim to common time periods
(ds_obs_trim, ds_mod_trim) = esio.trim_common_times(da_obs_avg_mon,
                                                    da_mod_avg_mon)

# In[ ]:

# Temp dump to netcdf then load
os.chdir(temp_dir)
c_e, datasets = zip(*ds_mod_trim.to_dataset(
    name='sic').groupby('init_time.year'))
paths = ['GFDL_extent_esns_%s.nc' % e for e in c_e]
xr.save_mfdataset(datasets, paths)

# In[ ]:

print("Done!")

# In[ ]:

# ds_mod_trim = None # Flush memory

# In[ ]:

# ds_mod_trim = xr.open_mfdataset(os.path.join(temp_dir, 'GFDL_extent_esns_*.nc'), concat_dim='ensemble')

# ds_mod_trim = ds_mod_trim.reindex(ensemble=sorted(ds_mod_trim.ensemble.values))
# #