def test_subset_variables(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        timestr = ['xtime_start', 'xtime_end']
        varList = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']

        # first, test loading the whole data set and then calling
        # subset_variables explicitly
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(x,
                                                             timestr=timestr,
                                                             yearoffset=1850))
        ds = mpas_xarray.subset_variables(ds, varList)
        self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList))
        self.assertEqual(pd.Timestamp(ds.Time.values[0]),
                         pd.Timestamp('1855-01-16 12:22:30'))

        # next, test the same with the onlyvars argument
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(x,
                                                             timestr=timestr,
                                                             onlyvars=varList,
                                                             yearoffset=1850))
        self.assertEqual(ds.data_vars.keys(), varList)
def scaleVSpower():

    power = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/power_maps/' \
                           'lsta_daily_power*.nc')


    scale = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/scale_maps/' \
                           'lsta_daily_scale*.nc')


    scales = np.unique(scale['LSTA'].values[0:300,:,:])
    scales = scales[np.isfinite(scales)]

    power_arr = power['LSTA'][0:300]
    scale_arr = scale['LSTA'][0:300]

    mlist = []

    for s in scales:
        print('Doing '+str(s))
        mean = np.nanmean(power_arr.where(scale_arr.values == s).values)
        mlist.append(mean)


    f= plt.figure()

    plt.scatter(scales,mlist)
def main (era_filesearch, cesm_base_filesearch, bias_output):

    print("opening data")
    era_data         = xr.open_mfdataset(era_filesearch,         concat_dim='time')
    base_cesm_data   = xr.open_mfdataset(cesm_base_filesearch,   concat_dim='time')

    print("loading data")
    era_data.load()
    base_cesm_data.load()

    print("compute means")
    emean = era_data.std(dim="time")
    cmean = base_cesm_data.std(dim="time")

    print("creating data")
    interpolated_era = xr.zeros_like(cmean)
    print("loading data")
    interpolated_era.load()

    z_interp_all_vars(emean, interpolated_era, era_data["z"].mean(dim="time"), base_cesm_data["z"].mean(dim="time"), vars_to_correct)
    interpolated_era.to_netcdf("era_interpolated_std.nc")

    print("Computing Bias")
    bias = interpolated_era - cmean

    print("writing")
    bias.to_netcdf(bias_output)
def file_loop():

    lsta = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/scale_maps/' \
                           'lsta_daily_scale_*.nc')


    lsta_check = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/' \
                           'lsta_daily_*.nc')

    lsta_check = lsta_check.sel(lat=slice(lsta['lat'].values.min(),lsta['lat'].values.max()), lon=slice(lsta['lon'].values.min(),lsta['lon'].values.max()))


    lsta_checks = lsta_check['LSTA'].where(lsta_check['LSTA']>-800)
    lsta_checks = lsta_checks.where(lsta.time==lsta_checks.time)

    bins = np.arange(-20,20,2)
    f=plt.figure()
    plt.hist(lsta_checks.values[np.isfinite(lsta_checks.values)], bins=bins, edgecolor='k')

    bins = np.arange(-140, 141, 10)

    ll = []

    for i, b in enumerate(bins[0:-1]):

        b1 = bins[i+1]

        lmean = np.percentile(lsta_checks.where((lsta['LSTA'].values>=b) &  (lsta['LSTA'].values<b1)), 90)

        ll.append(lmean)

    pdb.set_trace()
    f = plt.figure()
    plt.scatter(bins[1::], ll)
Example #5
0
 def test_deterministic_names(self):
     with create_tmp_file() as tmp:
         data = create_test_data()
         data.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             original_names = dict((k, v.data.name) for k, v in ds.items())
         with open_mfdataset(tmp) as ds:
             repeat_names = dict((k, v.data.name) for k, v in ds.items())
         for var_name, dask_name in original_names.items():
             self.assertIn(var_name, dask_name)
             self.assertIn(tmp, dask_name)
         self.assertEqual(original_names, repeat_names)
Example #6
0
def read_nc_files(dir, bounds=None):
    def rmheight(d):
        #del d["height"]
        return d

    files = get_reanalysis_file_paths(dir)
    if len(files) > 1:
        data = xarray.open_mfdataset(files, preprocess=lambda d: assert_bounds(d, bounds))
    elif len(files) == 1:
        data = xarray.open_mfdataset(files, preprocess=lambda d: assert_bounds(d, bounds))
    else:
        raise IOError("There are no .nc files in that directory.")
    return data
Example #7
0
 def test_lock(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp, format='NETCDF3_CLASSIC')
         with open_dataset(tmp, chunks=10) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp, engine='scipy') as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertNotIsInstance(task[-1], type(Lock()))
Example #8
0
def saveMonthly18():
    msg_folder = '/users/global/cornkle/data/OBS/gridsat/gridsat_netcdf/z18_panAfrica/'

    da = xr.open_mfdataset(msg_folder+'gridsat_WA_*18UTC.nc')
    da = da.where((da<=-40) & (da>=-110))
    da = da.resample('m', dim='time', how='mean')
    da.to_netcdf(msg_folder+'gridsat_monthly_18UTC.nc')
Example #9
0
 def data(self):
     try:
         if self.path:
             return open_mfdataset(self.path / "data*.nc")
         return self._concat_fields(self._cached_data)
     except OSError:
         return
Example #10
0
    def retrieve(path, isel='all', lazy=True):
        path = Path(path)
        try:
            data = open_dataset(path / "data.nc")
            lazy = True
        except FileNotFoundError:
            data = open_mfdataset(path / "data*.nc",
                                  concat_dim="t").sortby("t")
        try:
            with open(Path(path) / 'metadata.yml', 'r') as yaml_file:
                metadata = yaml.load(yaml_file)
        except FileNotFoundError:
            # Ensure retro-compatibility with older version
            with open(path.glob("Treant.*.json")[0]) as f:
                metadata = json.load(f)["categories"]

        if isel == 'last':
            data = data.isel(t=-1)
        elif isel == 'all':
            pass
        elif isinstance(isel, dict):
            data = data.isel(**isel)
        else:
            data = data.isel(t=isel)

        if not lazy:
            return FieldsData(data=data.load(),
                              metadata=AttrDict(**metadata))

        return FieldsData(data=data,
                          metadata=AttrDict(**metadata))
Example #11
0
def month_count_concat():
    msg_folder = cnst.GRIDSAT
    fname = 'aggs/gridsat_WA_-65_monthly_count_-40base_15-21UTC_1000km2.nc'
    da = xr.open_mfdataset(cnst.GRIDSAT + 'gridsat_WA_-40_1000km2_15-21UTC*_monthSum.nc')

    enc = {'tir': {'complevel': 5, 'zlib': True}}
    da.to_netcdf(msg_folder + fname, encoding=enc)
Example #12
0
 def test_open_and_do_math(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             actual = 1.0 * ds
             self.assertDatasetAllClose(original, actual)
    def test_variable_map(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        varMap = {
            'avgSurfaceTemperature':
                ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature',
                 'other_string',
                 'yet_another_string'],
            'daysSinceStartOfSim':
                ['time_avg_daysSinceStartOfSim',
                 'xtime',
                 'something_else'],
            'avgLayerTemperature':
                ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature',
                 'test1',
                 'test2'],
            'Time': [['xtime_start', 'xtime_end'],
                     'time_avg_daysSinceStartOfSim']}

        varList = ['avgSurfaceTemperature', 'avgLayerTemperature',
                   'refBottomDepth', 'daysSinceStartOfSim']

        # preprocess_mpas will use varMap to map the variable names from their
        # values in the file to the desired values in varList
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(
                x,
                timestr='Time',
                onlyvars=varList,
                yearoffset=1850,
                varmap=varMap))

        # make sure the remapping happened as expected
        self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList))
Example #14
0
def open_cchdo_as_mfdataset(paths, target_pressure,
                            pressure_coord='pressure',
                            concat_dim='time'):
    """Open cchdo hydrographic data in netCDF format, interpolate to
    specified pressures, and combine as an xarray dataset
    
    Parameters
    ----------
    paths : str or sequence
        Either a string glob in the form "path/to/my/files/*.nc" or an explicit
        list of files to open.
    target_pressure : arraylike
        Target pressure to which all casts are interpolated
    pressure_coord : str
        Name of the coordinate variable for pressure
    concat_dim : str
        Name of the dimension along which to concatenate casts
        
    Returns
    -------
    ds : xarray Dataset
    """
   
    # add time if missing
    timefun = _maybe_add_time_coord
    # create interpolation function for pressure
    interpfun = functools.partial(interp_coordinate,
                interp_coord=pressure_coord, interp_data=target_pressure)
    # create renaming function for concatenation
    renamefun = functools.partial(rename_0d_coords, new_dim=concat_dim)
    # compose together
    ppfun = compose(interpfun, renamefun, timefun)
    #paths = os.path.join(ddir, match_pattern)
    return xr.open_mfdataset(paths, concat_dim=concat_dim, preprocess=ppfun)
Example #15
0
def _load_data_from_disk(file_set, preprocess_func=lambda ds: ds,
                         data_vars='minimal', coords='minimal',
                         grid_attrs=None, **kwargs):
    """Load a Dataset from a list or glob-string of files.

    Datasets from files are concatenated along time,
    and all grid attributes are renamed to their aospy internal names.

    Parameters
    ----------
    file_set : list or str
        List of paths to files or glob-string
    preprocess_func : function (optional)
        Custom function to call before applying any aospy logic
        to the loaded dataset
    data_vars : str (default 'minimal')
        Mode for concatenating data variables in call to ``xr.open_mfdataset``
    coords : str (default 'minimal')
        Mode for concatenating coordinate variables in call to
        ``xr.open_mfdataset``.
    grid_attrs : dict
        Overriding dictionary of grid attributes mapping aospy internal
        names to names of grid attributes used in a particular model.

    Returns
    -------
    Dataset
    """
    apply_preload_user_commands(file_set)
    func = _preprocess_and_rename_grid_attrs(preprocess_func, grid_attrs,
                                             **kwargs)
    return xr.open_mfdataset(file_set, preprocess=func, concat_dim=TIME_STR,
                             decode_times=False, decode_coords=False,
                             mask_and_scale=True, data_vars=data_vars,
                             coords=coords)
Example #16
0
def main(files, out):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(var='eng')  # load engineering variables
    # for nc in list_files:
    #     print nc

        # the engine that xarray uses can be changed as specified here 
        # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset

    with xr.open_mfdataset(list_files, engine='netcdf4') as ds_disk:

        # change dimensions from 'obs' to 'time'
        ds_disk = ds_disk.swap_dims({'obs': 'time'})
        ds_variables = ds_disk.data_vars.keys()  # List of dataset variables
        stream = ds_disk.stream  # List stream name associated with the data
        title_pre = mk_str(ds_disk.attrs, 't')  # , var, tt0, tt1, 't')
        save_pre = mk_str(ds_disk.attrs, 's')  # , var, tt0, tt1, 's')
        save_dir = os.path.join(out, ds_disk.subsite, ds_disk.node, ds_disk.stream, 'pcolor')
        cf.create_dir(save_dir)

        # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data)
        # tI = t0 + t1 - (t0 / 2)
        # time_list = [[t0, t1], [t0, tI], [tI, t1]]
        # time_list = [[t0, t1]]

        # for period in time_list:
        #     tt0 = period[0]
        #     tt1 = period[1]
        #     sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1)))
        bins = ds_disk['bin_depths']
        north = ds_disk['northward_seawater_velocity']
        east = ds_disk['eastward_seawater_velocity']
        # up = ds_disk['upward_seawater_velocity']
        # error = ds_disk['error_velocity']

        time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT'))
        bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units))
        north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units))
        east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units))
        # up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units))
        # error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units))

        sname = save_pre + 'ADCP'
        title = title_pre
        fig, axs = pf.adcp(time, bins, north, east, title)
        pf.resize(width=12, height=8.5)  # Resize figure
        pf.save_fig(save_dir, sname, res=250)  # Save figure
        plt.close('all')
Example #17
0
def read_var_in_memory(dir, common_suffix="daily.nc", varname="lake_ice_fraction"):
    """
    :param dir:
    :param common_suffix:
    """
    with xarray.open_mfdataset(f"{dir}/*{common_suffix}") as ds:
        d_arr = ds[varname].load()
        return d_arr
Example #18
0
    def test_open_mfdataset(self):
        original = Dataset({'foo': ('x', np.random.randn(10))})
        with create_tmp_file() as tmp1:
            with create_tmp_file() as tmp2:
                original.isel(x=slice(5)).to_netcdf(tmp1)
                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                with open_mfdataset([tmp1, tmp2]) as actual:
                    self.assertIsInstance(actual.foo.variable.data, da.Array)
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((5, 5),))
                    self.assertDatasetAllClose(original, actual)
                with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual:
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((3, 2, 3, 2),))

        with self.assertRaisesRegexp(IOError, 'no files to open'):
            open_mfdataset('foo-bar-baz-*.nc')
Example #19
0
def test_load_mpas_xarray_timeSeriesStats_datasets(path): #{{{
    ds = xr.open_mfdataset(path, preprocess=preprocess_mpas_timeSeriesStats)
    ds = remove_repeated_time_index(ds)
    ds2 = xr.open_mfdataset(path, preprocess=preprocess_mpas)
    ds2 = remove_repeated_time_index(ds2)

    # make a simple plot from the data
    def plot_data(ds):
        var = ds["timeSeriesStatsMonthly_avg_iceAreaCell_1"]
        return var.where(var > 0).mean('nCells').plot()

    plot_data(ds)
    plot_data(ds2)
    plt.title("Curve centered around right times (b) \n "+\
              "Curve shifted towards end of avg period (g)")
    plt.show()

    return #}}}
Example #20
0
def test_load_mpas_xarray_datasets(path): #{{{
    ds = xr.open_mfdataset(path, preprocess=preprocess_mpas)
    ds = remove_repeated_time_index(ds)

    # make a simple plot from the data
    ds.Time.plot()
    plt.show()

    return #}}}
Example #21
0
 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)),
                 original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)
def size_trend():

    msg_folder = '/users/global/cornkle/data/OBS/gridsat/gridsat_netcdf/yearly_files/'
    data = xr.open_mfdataset(msg_folder + 'gridsat*.nc')

    cut = data.sel(lat=slice(10,17), lon=slice(-17,-10))
    cut = cut.isel(time= ((cut['time.year']>1984) & (cut['time.month']==8)))
    cut=cut['t']

    dic= {}
    for p in np.arange(1985,2017,1):
        dic[p] = []

    def mcs_find(image, thresh=None):
        if not thresh:
            print('Give threshold')
            return

        image[image > thresh] = 0
        image[image <= thresh] = 1
        image[np.isnan(image)] = 0

        if np.sum(image<10):
            return []

        labels, numL = label(image)

        ret = []

        for l in np.unique(labels):
            if l == 0:
                continue

            blob = np.sum(labels == l)

            pdb.set_trace()

            if np.sum(len(blob[0])) < 100:  # at least 1000m2
                continue

            ret.append(blob*49)

        return ret

    for i in np.arange(cut.shape[0]):

        ret = mcs_find(cut[i,:,:].values, thresh=-40)
        if ret == []:
            continue
        pdb.set_trace()
        dic[cut['time.year']].append(ret)

    pdb.set_trace()

    for d in dic:
        d = [item for sublist in d for item in sublist]
def read_mixed(paths, variable: str, **args):
    """
    Reads variable from multiple files and mixed locations
    """
    ds = xr.open_mfdataset(paths, concat_dim='time')
    v = ds[variable]
    a = v[args] 
    a.load()
    
    return a
Example #24
0
def plot():



    lsta_all = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/scale_maps_smallPref/*.nc')

    temp_all = xr.open_mfdataset('/users/global/cornkle/data/OBS/modis_LST/modis_netcdf/lsta_daily_*.nc')
    temp_all = temp_all.sel(lat=slice(10.5,17.5), lon=slice(-9.5,9.5))
    lsta_all = temp_all.sel(lat=slice(10.5, 17.5), lon=slice(-9.5, 9.5))

    temp_all = temp_all.where(temp_all['time'] == lsta_all['time'])

    lsta_all = lsta_all.where(temp_all > -800)
    temp_all = temp_all.where(temp_all > -800)

    lsta_all = lsta_all.where(np.abs(temp_all['LSTA'].values) > 0.2)
    temp_all = temp_all.where(np.abs(temp_all['LSTA'].values) > 0.2)

    dic = pkl.load( open("/users/global/cornkle/figs/LSTA-bullshit/scales/new/scalesVSblob.p", "rb"))

    blob = np.squeeze(np.concatenate(dic['blob']))
    scale = np.squeeze(np.concatenate(dic['scale']))
    temp = np.squeeze(np.concatenate(dic['temp']))


    scalei = scale[np.isfinite(scale) & np.isfinite(temp)]
    blobi = blob[np.isfinite(scale) & np.isfinite(temp)]
    tempi = temp[np.isfinite(scale) & np.isfinite(temp)]


    H, xbins, ybins = np.histogram2d(tempi,np.abs(scalei) , bins = [ np.arange(-10,11,2), np.arange(0,151,15)])
    H = H.transpose() #/ np.sum(H)

    H2, xbins, ybins = np.histogram2d(temp_all['LSTA'].values.flatten(), np.abs(lsta_all['LSTA'].values.flatten()), bins=[np.arange(-10, 11, 2), np.arange(0, 151, 15)])
    #H2 = H2.transpose() / np.sum(H2)

    X,Y = np.meshgrid(xbins, ybins)

    f = plt.figure()

    plt.pcolormesh(X,Y,H, cmap='viridis')
    plt.colorbar()
def test_plot_area_avg(target_nc_folder="", source_nc_path=""):

    # target_nc_folder = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_1980-2009"
    # target_nc_folder = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_icefix_Obs_1980-1981_test"

    #target_nc_folder = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_test2_1980-1981_test1"



    ice_fr = xarray.open_dataset(source_nc_path)["LC"]

    assert isinstance(ice_fr, xarray.DataArray)
    ice_fr = ice_fr.where((ice_fr >= 0) & (ice_fr <= 1))


    # t, x, y
    source_data = ice_fr.to_masked_array(copy=False)
    source_time = ice_fr.coords["time"]
    source_time = pd.to_datetime(source_time.values.tolist())

    s_source = pd.Series(data=[
        (field[~field.mask].mean() if not np.all(field.mask) else np.nan) for field in source_data
    ], index=source_time)

    ice_fr_lkeff = xarray.open_mfdataset(target_nc_folder + "/*daily.nc")["lake_ice_fraction"]
    lkeff_data = ice_fr_lkeff.to_masked_array(copy=False)
    lkeff_time = pd.to_datetime(ice_fr_lkeff.coords["t"].values.tolist())

    s_lkeff = pd.Series([
        (field[~field.mask].mean() if not np.all(field.mask) else np.nan) for field in lkeff_data
    ], index=lkeff_time)

    s_source = s_source[(s_source.index <= lkeff_time[-1]) & (s_source.index >= lkeff_time[0])]

    assert isinstance(s_source, pd.Series)

    #
    print(f"Source: len={len(s_source)}")
    print(f"Lkeff: len={len(s_lkeff)}")

    # do the plotting
    fig = plt.figure()
    gs = GridSpec(2, 1)
    # plot initial lake fractions
    ax = fig.add_subplot(gs[0, 0])
    s_source.plot(ax=ax, marker=".", linestyle="None", label="original")
    ax.legend()


    # plot lake fractions outputed by hles algorithm
    ax = fig.add_subplot(gs[1, 0], sharex=ax)
    s_lkeff.plot(ax=ax, marker=".", linestyle="None", label="lkeff")

    ax.legend()
Example #26
0
    def test_preprocess_mfdataset(self):
        original = Dataset({'foo': ('x', np.random.randn(10))})
        with create_tmp_file() as tmp:
            original.to_netcdf(tmp)

            def preprocess(ds):
                return ds.assign_coords(z=0)

            expected = preprocess(original)
            with open_mfdataset(tmp, preprocess=preprocess) as actual:
                self.assertDatasetIdentical(expected, actual)
Example #27
0
def mergeCMORPH():

    sm_folder = '/users/global/cornkle/data/OBS/CMORPH/CMORPH_nc/'

    for y in range(2006, 2011):
        files = sm_folder + str(y) + '/' + '*.nc'
        ds = xr.open_mfdataset(files)

        enc = {'pr': {'complevel': 5, 'zlib': True}}
        ds.to_netcdf(sm_folder + 'CMORPH_WA_' + str(y) + '.nc', encoding=enc, format='NETCDF4')

        print('Wrote ' + sm_folder + 'CMORPH_WA_' + str(y) + '.nc')
Example #28
0
def read_data(path):
    """
    Read in multiple netCDF files and combine them in an xarray dataset.

    :rtype: xr.Dataset
    :param path: Path to the folder
    :return: The resulting dataset
    """
    path = path + os.sep + '*.nc'
    print(path)
    dataset = xr.open_mfdataset(path, concat_dim='time')
    return dataset
Example #29
0
    def __init__(self, path="", vname=""):
        self.ds = xarray.open_mfdataset(path)

        print(self.ds)

        self.data = self.ds[vname]
        self.vname = vname


        # Create the caching directory for a variable
        self.cache_dir = Path("Daymet_cache") / vname
        self.cache_dir.mkdir(parents=True, exist_ok=True)
Example #30
0
def tamsat(y):

    path = '/users/global/cornkle/data/OBS/TAMSATv3/'

    print('Doing '+str(y))

    area = np.array([[-17,-10,10,17], [-10,-2,10,17], [-2,9,10,17], [-3,3,4,12], [-17,-15,13.5,15.5]]) # [-17,-10,13,17]

    coord = area[4]

    # regions: west, central, east, Ghana, Sahel


    data = xr.open_mfdataset(path + 'rfe'+str(y)+'*.nc')
    data = data.sel(lon=slice(coord[0], coord[1]), lat=slice(coord[3], coord[2]))
    print('Opened data')

    data = data['rfe']

    tstart=[]
    tend = []

    # for yy in np.arange(data.shape[0]):
    #     for xx in np.arange(data.shape[1]):
    mean = data.mean(dim=['lat', 'lon'])
   # mean = data.isel(lat=yy, lon=xx)
    md = mean.to_pandas()
    md = md.reindex(pd.date_range(str(y)+'-01-01', str(y)+'-12-31', freq='D'))

    diff = RainySeason(md)

    dstart= np.argmin(diff)
    dend=np.argmax(diff)

    # f = plt.figure()
    # ax = f.add_subplot(111)
    # plt.plot(diff.index, diff)
    # plt.axvline(dstart, color='k')
    # plt.axvline(dend, color='k')
    # plt.text(dstart-1, -50, str(dstart))
    # plt.text(dend-1, -50, str(dend))
    # plt.minorticks_on()


    # tstart.append(dstart)
    # tend.append(dend)
    # start = np.median(tstart)
    # end = np.median(tend)
    #
    # print('Done ' + str(y))

    return dstart, dend
def read(dictArgs):
    """ read data from model and obs files, process data and return it """

    dsmodel = xr.open_mfdataset(dictArgs["infile"],
                                combine="by_coords",
                                decode_times=False)

    if dictArgs["obsfile"] is not None:
        # priority to user-provided obs file
        dsobs = xr.open_mfdataset(dictArgs["obsfile"],
                                  combine="by_coords",
                                  decode_times=False)
    else:
        # use dataset from catalog, either from command line or default
        cat_platform = "catalogs/obs_catalog_" + dictArgs["platform"] + ".yml"
        catfile = pkgr.resource_filename("om4labs", cat_platform)
        cat = intake.open_catalog(catfile)
        dsobs = cat[dictArgs["dataset"]].to_dask()

    # read in model and obs data
    datamodel = read_data(dsmodel, dictArgs["possible_variable_names"])
    dataobs = read_data(dsobs, dictArgs["possible_variable_names"])

    # subset data
    if dictArgs["depth"] is None:
        dictArgs["depth"] = dictArgs["surface_default_depth"]

    if dictArgs["depth"] is not None:
        datamodel = subset_data(datamodel, "assigned_depth", dictArgs["depth"])
        dataobs = subset_data(dataobs, "assigned_depth", dictArgs["depth"])

    # reduce data along depth (not yet implemented)
    if "depth_reduce" in dictArgs:
        if dictArgs["depth_reduce"] == "mean":
            # do mean
            pass
        elif dictArgs["depth_reduce"] == "sum":
            # do sum
            pass

    # reduce data along time, here mandatory
    if ("assigned_time"
            in datamodel.dims) and (len(datamodel["assigned_time"]) > 1):
        warnings.warn("input dataset has more than one time record, " +
                      "performing non-weighted average")
        datamodel = simple_average(datamodel, "assigned_time")
    if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1:
        warnings.warn("reference dataset has more than one time record, " +
                      "performing non-weighted average")
        dataobs = simple_average(dataobs, "assigned_time")

    datamodel = datamodel.squeeze()
    dataobs = dataobs.squeeze()

    # check final data is 2d
    assert len(datamodel.dims) == 2
    assert len(dataobs.dims) == 2

    # check consistency of coordinates
    assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"])
    assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"])

    # homogeneize coords
    dataobs = copy_coordinates(datamodel, dataobs,
                               ["assigned_lon", "assigned_lat"])

    # restrict model to where obs exists
    datamodel = datamodel.where(dataobs)

    # dump values
    model = datamodel.to_masked_array()
    obs = dataobs.to_masked_array()
    x = datamodel["assigned_lon"].values
    y = datamodel["assigned_lat"].values

    # compute area
    if "areacello" in dsmodel.variables:
        area = dsmodel["areacello"].values
    else:
        if model.shape == (180, 360):
            area = compute_area_regular_grid(dsmodel)
        else:
            raise IOError("no cell area provided")

    return x, y, area, model, obs
    def exec(self):

        log.info('[START] {}'.format("exec"))

        try:

            # bash RunShell-Python.sh "TalentPlatform-LSH0291-Analy.py" "2018-01-01" "2021-01-01"
            # nohup bash RunShell-Python.sh "TalentPlatform-LSH0291-Analy.py" "2018-01-01" "2021-01-01" &
            # /home/dxinyu/TEST/OUTPUT
            # /home/dxinyu/TEST/OUTPUT

            # python3 "/home/dxinyu/TEST/TalentPlatform-LSH0291-DataMerge.py" --inpPath "/home/dxinyu/TEST/OUTPUT" --outPath "/home/dxinyu/TEST/OUTPUT"
            # /data/dxinyu/CM_EDGAR/v5.0_FT2019_carbon_monitor
            # /home/dxinyu/TEST/EDGAR

            if (platform.system() == 'Windows'):

                # 옵션 설정
                sysOpt = {
                    # 시작/종료 시간
                    'srtDate': '2019-01-01',
                    'endDate': '2019-12-31'
                }

            else:

                # 옵션 설정
                sysOpt = {
                    # 시작/종료 시간
                    # 'srtDate': globalVar['srtDate']
                    # , 'endDate': globalVar['endDate']
                }

            keyList = [
                'Oil_Power_Plants', 'Coal_Power_Plants', 'Gas_Power_Plants'
            ]

            dsDataL2 = xr.Dataset()
            for i, keyInfo in enumerate(keyList):
                log.info("[CHECK] keyInfo : {}".format(keyInfo))

                dtSrtDate = pd.to_datetime(sysOpt['srtDate'],
                                           format='%Y-%m-%d')
                dtEndDate = pd.to_datetime(sysOpt['endDate'],
                                           format='%Y-%m-%d')
                dtIncDateList = pd.date_range(start=dtSrtDate,
                                              end=dtEndDate,
                                              freq='1M')
                # dtIncDateInfo = dtIncDateList[0]

                searchFileList = []
                for j, dtIncDateInfo in enumerate(dtIncDateList):
                    log.info(
                        "[CHECK] dtIncDateInfo : {}".format(dtIncDateInfo))
                    dtYear = dtIncDateInfo.strftime('%Y')
                    dtMonth = dtIncDateInfo.strftime('%m').replace('0', '')

                    # inpFilePattern = '{}/CarbonMonitor_*{}*_y{}_m{}.nc'.format(serviceName, keyInfo, dtYear, dtMonth)
                    # inpFilePattern = 'projects_v5.0_FT2019_carbon_monitor_*{}_{}_{}.txt'.format(keyInfo, dtYear, dtMonth)
                    inpFilePattern = '{}_{}_{}_{}.nc'.format(
                        serviceName, keyInfo, dtYear, dtMonth)
                    inpFile = '{}/{}/EDGAR/{}'.format(globalVar['inpPath'],
                                                      serviceName,
                                                      inpFilePattern)
                    fileList = sorted(glob.glob(inpFile))
                    if (len(fileList) < 1): continue

                    fileInfo = fileList[0]
                    searchFileList.append(fileList[0])

                dsData = xr.open_mfdataset(searchFileList)
                dsDataL1 = dsData.rename({'ems': keyInfo})
                dsDataL2 = dsDataL2.merge(dsDataL1)
            dsDataL2[[
                'Oil_Power_Plants', 'Coal_Power_Plants', 'Gas_Power_Plants'
            ]].to_array()

            dsDataL2['Gas_Power_Plants'][:, :, 4].plot()
            plt.show()

            # 변수별로 합계
            dsDataL3 = dsDataL2.copy().to_array().sum("variable")
            dsDataL3 = dsDataL2.copy().assign(
                ems=dsDataL2['Oil_Power_Plants'] +
                dsDataL2['Coal_Power_Plants'] + dsDataL2['Gas_Power_Plants'])

            np.nansum(dsDataL3['ems'].values)
            np.nansum(dsDataL3.values)

            # dsDataL3 = xr.where((dsDataL3 == 0), np.nan, dsDataL3)

            cnt2D = dsDataL3.count(['date'])
            mean2D = dsDataL3.mean(['date'])
            sd2D = dsDataL3.std(['date'])
            sum2D = dsDataL3.sum(['date'])

            time1D = dsDataL3['date'].values
            lon1D = dsDataL3['lon'].values
            lat1D = dsDataL3['lat'].values
            lon2D, lat2D = np.meshgrid(lon1D, lat1D)

            dsDataL4 = dsDataL3.sel(date=time1D[0])
            dsDataL4.values

            dsDataL4.plot()
            plt.show()

            sd2D.plot()
            plt.show()

            d = cnt2D.values
            d.shape
            cnt2D.values.shape()
            plt.contourf(lat1D, lon1D, cnt2D.values)

            np.nanmean(cnt2D.values)
            np.nanmax(cnt2D.values)
            np.nanmin(cnt2D.values)

            # plt.scatter(lon2D, lat2D, c=cnt2D.values)
            plt.colorbar()
            plt.close()
            plt.show()

            # data = pd.read_csv(fileInfo, skiprows=[0, 1], sep=';')
            #
            # dtIncDatePattern = '{}-{}'.format(dtYear, dtMonth)
            # dtIncDate = pd.to_datetime(dtIncDatePattern, format='%Y-%m')
            # data['date'] = dtIncDate
            #
            # dataL1 = data.set_index(['lon', 'lat', 'date'])
            # dsData = dataL1.to_xarray()
            #
            # saveFile = '{}/{}_{}_{}_{}.nc'.format(globalVar['outPath'], serviceName, keyInfo, dtYear, dtMonth)
            # os.makedirs(os.path.dirname(saveFile), exist_ok=True)
            # xr.Dataset(dsData).to_netcdf(saveFile)
            # log.info('[CHECK] saveFile : {}'.format(saveFile))

            # # NetCDF 생산
            # dsDataL2 = xr.Dataset(
            #     {
            #         'mean': (('lat', 'lon'), (mean2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #         , 'count': (('lat', 'lon'), (cnt2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #         , 'sd': (('lat', 'lon'), (sd2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #         , 'sum': (('lat', 'lon'), (sum2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #         , 'extndUncrt': (('lat', 'lon'), (extndUncrt['emission'].values).reshape(len(lat1D), len(lon1D)))
            #         , 'rltvUncrt': (('lat', 'lon'), (rltvUncrt['emission'].values).reshape(len(lat1D), len(lon1D)))
            #     }
            #     , coords={
            #         'lat': lat1D
            #         , 'lon': lon1D
            #     }
            # )
            #
            #
            #
            #
            # dsData = xr.open_mfdataset(fileList)
            # dsData = xr.where((dsData == 0), np.nan, dsData)
            #
            # cnt2D = dsData.count(['month'])
            # mean2D = dsData.mean(['month'])
            # sd2D = dsData.std(['month'])
            # sum2D = dsData.sum(['month'])
            #
            # time1D = dsData['month'].values
            # lon1D = dsData['lon'].values
            # lat1D = dsData['lat'].values
            # lon2D, lat2D = np.meshgrid(lon1D, lat1D)
            #
            # *****************************************************************************
            # 확장/상대 불확도 계산
            # *****************************************************************************
            # 자유도
            df = len(time1D)

            # t값
            tVal = t(df)

            # 신뢰구간 95%에 대한 t값
            t025 = tVal.ppf(0.975)

            # 신뢰구간 95%  불확실성 범위
            leftConf = mean2D - t025 * (sd2D / np.sqrt(df))
            rightConf = mean2D + t025 * (sd2D / np.sqrt(df))

            # 확장 불확도
            extndUncrt = t025 * (sd2D / np.sqrt(df))

            # 상대 불확도 (%)
            rltvUncrt = (extndUncrt * 100) / mean2D

            # 총 불확도
            totalUncrt = (rltvUncrt * extndUncrt) / np.abs(extndUncrt)

            dtYear = 2019
            keyInfo = 'land'
            #
            # meanTotalUncrt = np.nanmean(totalUncrt[keyInfo].values)
            # mainTitle = '[{}] {} {} ({:.2f})'.format(dtYear, keyInfo, 'total uncertainty', meanTotalUncrt)
            # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'totalUncertainty', dtYear)
            # rtnInfo = makeMapPlot(lon2D, lat2D, totalUncrt[keyInfo].values, mainTitle, saveImg, None)
            # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))
            #
            # keyInfo = 'intl_bunker'
            # meanTotalUncrt = np.nanmean(totalUncrt[keyInfo].values)
            # mainTitle = '[{}] {} {} ({:.2f})'.format(dtYear, keyInfo, 'total uncertainty', meanTotalUncrt)
            # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'totalUncertainty',  dtYear)
            # rtnInfo = makeMapPlot(lon2D, lat2D, totalUncrt[keyInfo].values, mainTitle, saveImg, None)
            # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))
            #
            # totalUncrt['sum'] = totalUncrt['land'] + totalUncrt['intl_bunker']
            #
            # keyInfo = 'sum'
            # meanTotalUncrt = np.nanmean(totalUncrt[keyInfo].values)
            # mainTitle = '[{}] {} {} ({:.2f})'.format(dtYear, keyInfo, 'total uncertainty', meanTotalUncrt)
            # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'totalUncertainty',  dtYear)
            # rtnInfo = makeMapPlot(lon2D, lat2D, totalUncrt[keyInfo].values, mainTitle, saveImg, None)
            # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))
            #
            #
            # dsDataL3 = totalUncrt.to_dataframe().reset_index()
            # dsDataL4 = dsDataL3.merge(posDataL1, how='left', left_on=['lat', 'lon'], right_on=['lat', 'lon'])
            #
            # dsDataL3.describe()
            # posDataL1.describe()
            #
            # # from global_land_mask import globe
            # # is_on_land = globe.is_land(dsDataL3['lon'], dsDataL3['lat'])
            #
            #
            # # try:
            # #     totalUncrtTotal = dsDataL4.mean()
            # #     totalUncrtLandSea = dsDataL4.groupby(by=['landSea']).mean()
            # #     totalUncrtCont = dsDataL4.groupby(by=['cont']).mean()
            # #
            # #     emissionTotal = dsDataL4.mean()['mean']
            # #     emissionLandSea = dsDataL4.groupby(by=['landSea']).mean()['mean']
            # #     emissionCont = dsDataL4.groupby(by=['cont']).mean()['mean']
            # #
            # #     dict = {
            # #         'year': [dtYear]
            # #         , 'key': [keyInfo]
            # #         , 'rltvUncrt total': [rltvUncrtTotal]
            # #         , 'rltvUncrt land': [rltvUncrtLandSea['land']]
            # #         , 'rltvUncrt sea': [rltvUncrtLandSea['sea']]
            # #         , 'rltvUncrt Africa': [rltvUncrtCont['Africa']]
            # #         , 'rltvUncrt Antarctica': [rltvUncrtCont['Antarctica']]
            # #         , 'rltvUncrt Asia': [rltvUncrtCont['Asia']]
            # #         , 'rltvUncrt Australia': [rltvUncrtCont['Australia']]
            # #         , 'rltvUncrt Europe': [rltvUncrtCont['Europe']]
            # #         , 'rltvUncrt NorthAmerica': [rltvUncrtCont['NorthAmerica']]
            # #         , 'rltvUncrt SouthAmerica': [rltvUncrtCont['SouthAmerica']]
            # #     }
            # #
            # #     statData = statData.append(pd.DataFrame.from_dict(dict))
            #
            #
            #
            #
            # # mainTitle = '[{}] {} {}'.format(dtYear, keyInfo, 'emission')
            # # saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'emission', dtYear)
            # # rtnInfo = makeMapPlot(lon2D, lat2D, mean2D['emission'].values, mainTitle, saveImg, True)
            # # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))
            #
            # # *******************************************************
            # # 육/해상, 대륙별 배출량
            # # *******************************************************
            # keyList = ['total', 'Power', 'Industry', 'Residential', 'GroundTransportation', 'InternationalAviation', 'InternationalShipping', 'DomesticAviation']
            #
            # statData = pd.DataFrame()
            # for i, keyInfo in enumerate(keyList):
            #
            #     # dtYear = 2019
            #     for dtYear in range(2018, 2022):
            #         log.info("[CHECK] keyInfo : {}".format(keyInfo))
            #         log.info("[CHECK] dtYear : {}".format(dtYear))
            #
            #         inpFilePattern = '{}_{}_{}*.nc'.format(serviceName, keyInfo, dtYear)
            #         inpFile = '{}/{}'.format(globalVar['outPath'], inpFilePattern)
            #         log.info("[CHECK] inpFile : {}".format(inpFile))
            #
            #         fileList = sorted(glob.glob(inpFile))
            #         log.info('[CHECK] fileList : {}'.format(fileList))
            #         if (len(fileList) < 1): continue
            #
            #         dsData = xr.open_mfdataset(fileList)
            #         # log.info('[CHECK] dsData : {}'.format(dsData))
            #
            #         time1D = dsData['time'].values
            #         lon1D = dsData['lon'].values
            #         lat1D = dsData['lat'].values
            #         lon2D, lat2D = np.meshgrid(lon1D, lat1D)
            #
            #         # 결측값 처리
            #         dsData = xr.where((dsData == 0), np.nan, dsData)
            #
            #         # *****************************************************************************
            #         # 위/경도에 따른 통계 계산
            #         # *****************************************************************************
            #         cnt2D = dsData.count(['time'])
            #         mean2D = dsData.mean(['time'])
            #         sd2D = dsData.std(['time'])
            #         sum2D = dsData.sum(['time'])
            #
            #         # cntVal = np.nanmean(cnt2D['emission'])
            #         # log.info('[CHECK] cntVal : {}'.format(cntVal))
            #         #
            #         # sumVal = np.nanmean(sum2D['emission'])
            #         # log.info('[CHECK] sumVal : {}'.format(sumVal))
            #         #
            #         # meanVal = np.nanmean(mean2D['emission'])
            #         # log.info('[CHECK] meanVal : {}'.format(meanVal))
            #         #
            #         # sdVal = np.nanmean(sd2D['emission'])
            #         # log.info('[CHECK] sdVal : {}'.format(sdVal))
            #
            #
            #         # *****************************************************************************
            #         # 확장/상대 불확도 계산
            #         # *****************************************************************************
            #         # 자유도
            #         df = len(time1D)
            #
            #         # t값
            #         tVal = t(df)
            #
            #         # 신뢰구간 95%에 대한 t값
            #         t025 = tVal.ppf(0.975)
            #
            #         # 신뢰구간 95%  불확실성 범위
            #         # leftConf = mean2D - t025 * (sd2D / np.sqrt(df))
            #         # rightConf = mean2D + t025 * (sd2D / np.sqrt(df))
            #
            #         # 확장 불확도
            #         extndUncrt = t025 * (sd2D / np.sqrt(df))
            #
            #         # 상대 불확도 (%)
            #         rltvUncrt = (extndUncrt * 100) / mean2D
            #
            #         # NetCDF 생산
            #         dsDataL2 = xr.Dataset(
            #             {
            #                 'mean': (('lat', 'lon'), (mean2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #                 , 'count': (('lat', 'lon'), (cnt2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #                 , 'sd': (('lat', 'lon'), (sd2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #                 , 'sum': (('lat', 'lon'), (sum2D['emission'].values).reshape(len(lat1D), len(lon1D)))
            #                 , 'extndUncrt': (('lat', 'lon'), (extndUncrt['emission'].values).reshape(len(lat1D), len(lon1D)))
            #                 , 'rltvUncrt': (('lat', 'lon'), (rltvUncrt['emission'].values).reshape(len(lat1D), len(lon1D)))
            #             }
            #             , coords={
            #                 'lat': lat1D
            #                 , 'lon': lon1D
            #             }
            #         )
            #
            #         saveFile = '{}/{}_{}_{}_{}.nc'.format(globalVar['outPath'], serviceName, keyInfo, 'statData', dtYear)
            #         os.makedirs(os.path.dirname(saveFile), exist_ok=True)
            #         dsDataL2.to_netcdf(saveFile)
            #         log.info('[CHECK] saveFile : {}'.format(saveFile))
            #
            #         dsDataL3 = dsDataL2.to_dataframe().reset_index()
            #         dsDataL4 = dsDataL3.merge(posDataL1, how='left', left_on=['lat', 'lon'], right_on=['lat', 'lon'])
            #
            #         try:
            #             rltvUncrtTotal = dsDataL4.mean()['rltvUncrt']
            #             rltvUncrtLandSea = dsDataL4.groupby(by=['landSea']).mean()['rltvUncrt']
            #             rltvUncrtCont = dsDataL4.groupby(by=['cont']).mean()['rltvUncrt']
            #
            #             emissionTotal = dsDataL4.mean()['mean']
            #             emissionLandSea = dsDataL4.groupby(by=['landSea']).mean()['mean']
            #             emissionCont = dsDataL4.groupby(by=['cont']).mean()['mean']
            #
            #             dict = {
            #                 'year': [dtYear]
            #                 , 'key': [keyInfo]
            #                 , 'rltvUncrt total': [rltvUncrtTotal]
            #                 , 'rltvUncrt land': [rltvUncrtLandSea['land']]
            #                 , 'rltvUncrt sea': [rltvUncrtLandSea['sea']]
            #                 , 'rltvUncrt Africa': [rltvUncrtCont['Africa']]
            #                 , 'rltvUncrt Antarctica': [rltvUncrtCont['Antarctica']]
            #                 , 'rltvUncrt Asia': [rltvUncrtCont['Asia']]
            #                 , 'rltvUncrt Australia': [rltvUncrtCont['Australia']]
            #                 , 'rltvUncrt Europe': [rltvUncrtCont['Europe']]
            #                 , 'rltvUncrt NorthAmerica': [rltvUncrtCont['NorthAmerica']]
            #                 , 'rltvUncrt SouthAmerica': [rltvUncrtCont['SouthAmerica']]
            #
            #                 , 'emission total': [emissionTotal]
            #                 , 'emission land': [emissionLandSea['land']]
            #                 , 'emission sea': [emissionLandSea['sea']]
            #                 , 'emission Africa': [emissionCont['Africa']]
            #                 , 'emission Antarctica': [emissionCont['Antarctica']]
            #                 , 'emission Asia': [emissionCont['Asia']]
            #                 , 'Australia': [emissionCont['Australia']]
            #                 , 'emission Europe': [emissionCont['Europe']]
            #                 , 'emission NorthAmerica': [emissionCont['NorthAmerica']]
            #                 , 'emission SouthAmerica': [emissionCont['SouthAmerica']]
            #             }
            #
            #             statData = statData.append(pd.DataFrame.from_dict(dict))
            #         except Exception as e:
            #             log.error("Exception : {}".format(e))
            #
            #         # 시각화
            #         mainTitle = '[{}] {} {}'.format(dtYear, keyInfo, 'emission')
            #         saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'emission', dtYear)
            #         rtnInfo = makeMapPlot(lon2D, lat2D, mean2D['emission'].values, mainTitle, saveImg, True)
            #         log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))
            #
            #         mainTitle = '[{}] {} {}'.format(dtYear, keyInfo, 'relative uncertainty')
            #         saveImg = '{}/{}_{}_{}-{}.png'.format(globalVar['figPath'], serviceName, keyInfo, 'relativeUncertainty', dtYear)
            #         rtnInfo = makeMapPlot(lon2D, lat2D, rltvUncrt['emission'].values, mainTitle, saveImg, None)
            #         log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))
            #
            # saveXlsxFile = '{}/{}_{}.xlsx'.format(globalVar['outPath'], serviceName, 'statData')
            # os.makedirs(os.path.dirname(saveXlsxFile), exist_ok=True)
            # statData.to_excel(saveXlsxFile, index=False)
            # log.info("[CHECK] saveXlsxFile : {}".format(saveXlsxFile))

        except Exception as e:
            log.error("Exception : {}".format(e))
            raise e
        finally:
            log.info('[END] {}'.format("exec"))
def get_era5_daily(var,
                   date_from_arg,
                   date_to_arg=None,
                   reduce_func=None,
                   cache_dir='era5',
                   resample='1D'):
    """
    Download and return an variable from the European Centre for Medium 
    Range Weather Forecasts (ECMWF) global climate reanalysis product 
    (ERA5) for a defined time window.

    Parameters
    ----------     
    var : string
        Name of the ERA5 climate variable to download, e.g 
        "air_temperature_at_2_metres" 

    date_from_arg: string or datetime object
        Starting date of the time window.
        
    date_to_arg: string or datetime object
        End date of the time window. If not supplied, set to be the same
        as starting date.

    reduce_func: numpy function
        lets you specify a function to apply to each day's worth of data.  
        The default is np.mean, which computes daily average. To get a 
        sum, use np.sum.

    cache_dir: sting
        Path to save downloaded ERA5 data. The path will be created if 
        not already exists.
        The default is 'era5'.
        
    resample: string
        Temporal resampling frequency to be used for xarray's resample
        function. The default is '1D', which is daily. Since ERA5 data 
        is provided as one file per month, maximum resampling period is 
        '1M'.

    Returns
    -------
    A lazy-loaded xarray dataset containing an ERA5 variable for the 
    selected time window.

    """

    # Massage input data
    assert var in ERA5_VARS, "var must be one of [{}] (got {})".format(
        ','.join(ERA5_VARS), var)
    if not os.path.exists(cache_dir):
        os.mkdir(cache_dir)
    if reduce_func is None:
        reduce_func = np.mean
    if type(date_from_arg) == str:
        date_from_arg = parse(date_from_arg)
    if type(date_to_arg) == str:
        date_to_arg = parse(date_to_arg)
    if date_to_arg is None:
        date_to_arg = date_from_arg

    # Make sure our dates are in the correct order
    from_date = min(date_from_arg, date_to_arg)
    to_date = max(date_from_arg, date_to_arg)

    # Download ERA5 files to local cache if they don't already exist
    client = None  # Boto client (if needed)
    local_files = []  # Will hold list of local filenames
    Y, M = from_date.year, from_date.month  # Loop vars
    loop_end = to_date.year * 12 + to_date.month  # Loop sentinel
    while Y * 12 + M <= loop_end:
        local_file = os.path.join(
            cache_dir, "{Y:04}_{M:02}_{var}.nc".format(Y=Y, M=M, var=var))
        data_key = "{Y:04}/{M:02}/data/{var}.nc".format(Y=Y, M=M, var=var)
        if not os.path.isfile(
                local_file
        ):  # check if file already exists (TODO: move to temp, catch failed download)
            if client is None:
                client = boto3.client('s3',
                                      config=botocore.client.Config(
                                          signature_version=botocore.UNSIGNED))
            client.download_file('era5-pds', data_key, local_file)
        local_files.append(local_file)
        if M == 12:
            Y += 1
            M = 1
        else:
            M += 1

    # Load and merge the locally-cached ERA5 data from the list of filenames
    date_slice = slice(str(from_date.date()), str(to_date.date(
    )))  # I do this to INCLUDE the whole end date, not just 00:00

    def prepro(ds):
        if 'time0' in ds.dims:
            ds = ds.rename({"time0": "time"})
        if 'time1' in ds.dims:
            ds = ds.rename({
                "time1": "time"
            })  # This should INTENTIONALLY error if both times are defined
        ds = ds[[var]]
        output = ds.sel(time=date_slice).resample(
            time=resample).reduce(reduce_func)
        output.attrs = ds.attrs
        for v in output.data_vars:
            output[v].attrs = ds[v].attrs
        return output

    return xr.open_mfdataset(local_files,
                             combine='by_coords',
                             compat='equals',
                             preprocess=prepro,
                             parallel=True)
Example #34
0
# open L2 products
print('Loading data\n')


def preprocess(ds):
    ds['time'] = pd.to_datetime(
        np.array([
            attributes[ds.attrs['source_product']]['time_coverage_start']
        ])).values
    return ds


DS = xr.open_mfdataset([
    filename.replace('L2', 'L3')
    for filename in L2_files_urls if exists(filename.replace('L2', 'L3'))
],
                       combine='nested',
                       concat_dim='time',
                       preprocess=preprocess,
                       chunks={'time': 2000})
DS = DS.sortby('time')

# filter pixels
if args.shp is not None:
    print('\nApplying shapefile\n')
    mask = make_country_mask(args.shp, DS.longitude, DS.latitude)
    for column in [
            column_name for column_name in list(DS.variables)
            if DS[column_name].dims == ('time', 'latitude', 'longitude')
    ]:
        DS[column] = DS[column].where(mask)
Example #35
0
def nc_to_xr_dataset(liste_fichiers_nc,
                     verbose=1,
                     patch_xr_open_mfdataset=True):
    """
    fonction qui lit une liste de fichiers netcdf et qui retourne 
    un Dataset xarray

    """
    assert (isinstance(liste_fichiers_nc, list))
    assert (len(liste_fichiers_nc) > 0)

    # timing
    t00 = time.time()

    # impression de la liste des fichiers traites si verbose
    if verbose:
        print('ouverture des fichiers suivants:')
        for f in liste_fichiers_nc:
            print(f)

    # patch pour contourner la lenteur de xr.open_mfdataset
    #
    # xr.open_mfdataset prend environ 10x plus de temps que
    # nc.MFDataset. On peut couper le temps de moitie en
    # utilisant l'option decode_cf=False et xr.decode_cf() ensuite
    # mais la conversion en date ne fonctionne pas si le time unit
    # n'est pas identique pour tous les fichiers.
    #
    # la patch utilise nc.MFDataset pour calculer les dates et les
    # impose dataset retourne par xr.open_mfdataset avec
    # decode_cf=False
    #
    if patch_xr_open_mfdataset:
        # on extrait les dates avec netCDF4
        ds = nc.MFDataset(liste_fichiers_nc, 'r')
        vartime = nc.MFTime(ds.variables['time'])
        dates = cftime.num2date(vartime[:], vartime.units, vartime.calendar)

        # on verifie si une variable est de type int16
        def _has_int16(ds):
            for nv in ds.variables:
                if ds.variables[nv].dtype == 'int16':
                    return True
            return False

        has_int16 = _has_int16(ds)
        ds.close()
        # on ouvre les fichiers sans decoder time
        #
        # test selon que le dataset a des int16 ou non
        #
        # avec un champ de type short ou int16, la commande xr.decode_cf
        # n'applique pas le scale_factor et add_offset
        #
        # todo: raise an issue about this
        if has_int16:
            # option plus lente mais qui decode les int16
            print('*** il y a des int16 ***')
            ds = xr.open_mfdataset(liste_fichiers_nc,
                                   decode_times=False,
                                   combine='nested',
                                   concat_dim='time')
        else:
            # option plus rapide mais qui ne decode pas les int16
            ds = xr.open_mfdataset(liste_fichiers_nc,
                                   decode_cf=False,
                                   combine='nested',
                                   concat_dim='time',
                                   coords='minimal',
                                   compat='override',
                                   data_vars='minimal')
            ds = xr.decode_cf(ds)

        # on drop la variable time pour etre sur qu'il ne
        # reste pas d'attributs causant des problemes plus
        # tard
        ds = ds.drop('time')
        # on pose la variable time comme etant les dates calculees avant
        ds['time'] = ('time', dates)
        # on ajoute les attributs units et calendar
        ds.time.attrs['units'] = vartime.units
        ds.time.attrs['calendar'] = vartime.calendar
    else:
        ds = xr.open_mfdataset(liste_fichiers_nc)

    if verbose:
        print(
            f'ouverture de {len(liste_fichiers_nc)} fichiers en {time.time() - t00:6.2f}s'
        )
    return ds
Example #36
0
    if pressure_adjust:
        ds = get_pressure_coord_fields(case,
                                       varlist,
                                       from_time,
                                       to_time,
                                       history_fld,
                                       model=model)
        return ds
    else:
        if varlist is not None:
            fl = []
            vl_lacking = []
            for var in varlist:
                fn = get_filename_ng_field(var, model, case, from_time, to_time)
                if os.path.isfile(fn):
                    fl.append(fn)
                else:
                    vl_lacking.append(var)
        else:
            vl_lacking=varlist

        ds = xr_import_NorESM(case, vl_lacking, from_time, to_time, path=raw_data_path,
                              model=model,
                              history_fld=history_fld,
                              comp=comp, chunks=chunks)
        ds = xr_fix(ds, model_name=model)
        if len(fl)>0:
            ds_f_file = xr.open_mfdataset(fl, combine='by_coords')
            ds = xr.merge([ds, ds_f_file])
        return ds
Example #37
0
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_199801-199812.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_199901-199912.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200001-200012.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200101-200112.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200201-200212.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200301-200312.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200401-200412.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200501-200512.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200601-200612.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200701-200712.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200801-200812.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_200901-200912.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_201001-201012.nc',
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/ta/ta_Amon_reanalysis_IFS-Cy31r2_201101-201112.nc'
    ]
elif variable == "tas":
    pathList = [
        '/dass/dassnsd/data01/cldra/data/pubrepo/CREATE-IP/data/reanalysis/ECMWF/IFS-Cy31r2/mon/atmos/tas/tas_Amon_reanalysis_IFS-Cy31r2_197901-201712.nc'
    ]
else:
    raise Exception(f"Unknown variable: {variable}")

start = time.time()
dset: xr.Dataset = xr.open_mfdataset(pathList,
                                     data_vars=[variable],
                                     parallel=True)
var: xr.Variable = dset.variables.get(variable)
print(
    f"Opened dataset, shape: {var.shape}, completed  in {str(time.time() - start)} seconds"
)
Example #38
0
def ocn_modelvsobs(config, field):
    """
    Plots a comparison of ACME/MPAS output to SST or MLD observations

    Parameters
    ----------
    config :  instance of MpasAnalysisConfigParser
        Contains configuration options

    field : {'sst', 'sss', 'mld'}
        The name of a field to be analyized

    Authors
    -------
    Luke Van Roekel, Xylar Asay-Davis, Milena Veneziani

    Last Modified
    -------------
    03/23/2017
    """

    # perform common setup for the task
    namelist, runStreams, historyStreams, calendar, streamMap, \
        variableMap, plotsDirectory = setup_task(config, componentName='ocean')

    simulationStartTime = get_simulation_start_time(runStreams)

    # get a list of timeSeriesStats output files from the streams file,
    # reading only those that are between the start and end dates
    startDate = config.get('climatology', 'startDate')
    endDate = config.get('climatology', 'endDate')
    streamName = historyStreams.find_stream(streamMap['timeSeriesStats'])
    inputFiles = historyStreams.readpath(streamName,
                                         startDate=startDate,
                                         endDate=endDate,
                                         calendar=calendar)
    print 'Reading files {} through {}'.format(inputFiles[0], inputFiles[-1])

    observationsDirectory = build_config_full_path(
        config, 'oceanObservations', '{}Subdirectory'.format(field))
    mainRunName = config.get('runs', 'mainRunName')

    overwriteMpasClimatology = config.getWithDefault(
        'climatology', 'overwriteMpasClimatology', False)

    overwriteObsClimatology = config.getWithDefault('oceanObservations',
                                                    'overwriteObsClimatology',
                                                    False)

    try:
        restartFileName = runStreams.readpath('restart')[0]
    except ValueError:
        raise IOError('No MPAS-O restart file found: need at least one '
                      'restart file for ocn_modelvsobs calculation')

    sectionName = 'regridded{}'.format(field.upper())
    outputTimes = config.getExpression(sectionName, 'comparisonTimes')

    # get a list of regridded observations files and check if they exist.  If
    # they are all there, we don't have to do anything else with the
    # observations
    obsFileNames = \
        {'mld': "{}/holtetalley_mld_climatology.nc".format(
                observationsDirectory),
         'sst': "{}/MODEL.SST.HAD187001-198110.OI198111-201203.nc".format(
                observationsDirectory),
         'sss': "{}/Aquarius_V3_SSS_Monthly.nc".format(
                observationsDirectory)}

    obsFileName = obsFileNames[field]

    buildObsClimatologies = overwriteObsClimatology
    for months in outputTimes:
        (climatologyFileName, regriddedFileName) = \
            climatology.get_observation_climatology_file_names(
                config=config, fieldName=field, monthNames=months,
                componentName='ocean', gridFileName=obsFileName,
                latVarName='lat', lonVarName='lon')
        if not os.path.exists(regriddedFileName):
            buildObsClimatologies = True
            break

    varList = [field]

    if field == 'mld':

        iselvals = None

        if buildObsClimatologies:
            # Load MLD observational data
            dsObs = xr.open_mfdataset(obsFileName)

            # Increment month value to be consistent with the model output
            dsObs.iMONTH.values += 1

            # Rename the dimensions to be consistent with other obs. data sets
            dsObs.rename(
                {
                    'month': 'calmonth',
                    'lat': 'latCoord',
                    'lon': 'lonCoord'
                },
                inplace=True)
            dsObs.rename({
                'iMONTH': 'month',
                'iLAT': 'lat',
                'iLON': 'lon'
            },
                         inplace=True)
            # set the coordinates now that the dimensions have the same names
            dsObs.coords['lat'] = dsObs['latCoord']
            dsObs.coords['lon'] = dsObs['lonCoord']
            dsObs.coords['month'] = dsObs['calmonth']

            # Reorder dataset for consistence with other obs. data sets
            dsObs = dsObs.transpose('month', 'lat', 'lon')

        obsFieldName = 'mld_dt_mean'

        # Set appropriate MLD figure labels
        observationTitleLabel = \
            "Observations (HolteTalley density threshold MLD)"
        outFileLabel = "mldHolteTalleyARGO"
        unitsLabel = 'm'

    elif field == 'sst':

        iselvals = {'nVertLevels': 0}

        climStartYear = config.getint('oceanObservations',
                                      'sstClimatologyStartYear')
        climEndYear = config.getint('oceanObservations',
                                    'sstClimatologyEndYear')
        timeStart = datetime.datetime(year=climStartYear, month=1, day=1)
        timeEnd = datetime.datetime(year=climEndYear, month=12, day=31)

        if climStartYear < 1925:
            period = 'pre-industrial'
        else:
            period = 'present-day'

        if buildObsClimatologies:
            dsObs = xr.open_mfdataset(obsFileName)

            dsTimeSlice = dsObs.sel(time=slice(timeStart, timeEnd))
            monthlyClimatology = dsTimeSlice.groupby('time.month').mean('time')

            dsObs = monthlyClimatology.transpose('month', 'lat', 'lon')

        obsFieldName = 'SST'

        # Set appropriate figure labels for SST
        observationTitleLabel = \
            "Observations (Hadley/OI, {} {:04d}-{:04d})".format(period,
                                                                climStartYear,
                                                                climEndYear)
        outFileLabel = "sstHADOI"
        unitsLabel = r'$^o$C'

    elif field == 'sss':

        iselvals = {'nVertLevels': 0}

        timeStart = datetime.datetime(2011, 8, 1)
        timeEnd = datetime.datetime(2014, 12, 31)

        if buildObsClimatologies:
            dsObs = xr.open_mfdataset(obsFileName)
            dsTimeSlice = dsObs.sel(time=slice(timeStart, timeEnd))

            # The following line converts from DASK to numpy to supress an odd
            # warning that doesn't influence the figure output
            dsTimeSlice.SSS.values

            monthlyClimatology = dsTimeSlice.groupby('time.month').mean('time')

            # Rename the observation data for code compactness
            dsObs = monthlyClimatology.transpose('month', 'lat', 'lon')

        obsFieldName = 'SSS'

        observationTitleLabel = "Observations (Aquarius, 2011-2014)"
        outFileLabel = 'sssAquarius'
        unitsLabel = 'PSU'

    ds = open_multifile_dataset(fileNames=inputFiles,
                                calendar=calendar,
                                config=config,
                                simulationStartTime=simulationStartTime,
                                timeVariableName='Time',
                                variableList=varList,
                                iselValues=iselvals,
                                variableMap=variableMap,
                                startDate=startDate,
                                endDate=endDate)

    changed, startYear, endYear = \
        climatology.update_start_end_year(ds, config, calendar)

    monthlyClimatology = climatology.compute_monthly_climatology(ds, calendar)

    mpasMappingFileName = climatology.write_mpas_mapping_file(
        config=config, meshFileName=restartFileName)

    if buildObsClimatologies:
        obsMappingFileName = \
            climatology.write_observations_mapping_file(
                config=config, componentName='ocean', fieldName=field,
                gridFileName=obsFileName, latVarName='lat',  lonVarName='lon')
    else:
        obsMappingFileName = None

    (colormapResult, colorbarLevelsResult) = setup_colormap(config,
                                                            sectionName,
                                                            suffix='Result')
    (colormapDifference,
     colorbarLevelsDifference) = setup_colormap(config,
                                                sectionName,
                                                suffix='Difference')

    # Interpolate and compute biases
    for months in outputTimes:
        monthValues = constants.monthDictionary[months]

        (climatologyFileName, regriddedFileName) = \
            climatology.get_mpas_climatology_file_names(config=config,
                                                        fieldName=field,
                                                        monthNames=months)

        if overwriteMpasClimatology or not os.path.exists(climatologyFileName):
            seasonalClimatology = climatology.compute_seasonal_climatology(
                monthlyClimatology, monthValues, field)
            # write out the climatology so we can interpolate it with
            # interpolate.remap
            seasonalClimatology.to_netcdf(climatologyFileName)

        interpolate.remap(inFileName=climatologyFileName,
                          outFileName=regriddedFileName,
                          inWeightFileName=mpasMappingFileName,
                          sourceFileType='mpas',
                          overwrite=overwriteMpasClimatology)

        ncFile = netCDF4.Dataset(regriddedFileName, mode='r')
        modelOutput = ncFile.variables[field][:]
        lons = ncFile.variables["lon"][:]
        lats = ncFile.variables["lat"][:]
        ncFile.close()
        lonTarg, latTarg = np.meshgrid(lons, lats)

        # now the observations
        (climatologyFileName, regriddedFileName) = \
            climatology.get_observation_climatology_file_names(
                config=config, fieldName=field, monthNames=months,
                componentName='ocean', gridFileName=obsFileName,
                latVarName='lat', lonVarName='lon')

        if buildObsClimatologies:
            if (overwriteObsClimatology
                    or (not os.path.exists(climatologyFileName)
                        and not os.path.exists(regriddedFileName))):
                seasonalClimatology = climatology.compute_seasonal_climatology(
                    dsObs, monthValues, obsFieldName)
                # Either we want to overwite files or neither the climatology
                # nor its regridded counterpart exist. Write out the
                # climatology so we can interpolate it with interpolate.remap
                seasonalClimatology.to_netcdf(climatologyFileName)

            if obsMappingFileName is None:
                # no remapping is needed
                regriddedFileName = climatologyFileName
            else:
                interpolate.remap(inFileName=climatologyFileName,
                                  outFileName=regriddedFileName,
                                  inWeightFileName=obsMappingFileName,
                                  sourceFileType='latlon',
                                  overwrite=overwriteObsClimatology)

        # read in the results from the remapped files
        ncFile = netCDF4.Dataset(regriddedFileName, mode='r')
        observations = ncFile.variables[obsFieldName][:]
        ncFile.close()

        bias = modelOutput - observations

        outFileName = "{}/{}_{}_{}_years{:04d}-{:04d}.png".format(
            plotsDirectory, outFileLabel, mainRunName, months, startYear,
            endYear)
        title = "{} ({}, years {:04d}-{:04d})".format(field.upper(), months,
                                                      startYear, endYear)
        plot_global_comparison(config,
                               lonTarg,
                               latTarg,
                               modelOutput,
                               observations,
                               bias,
                               colormapResult,
                               colorbarLevelsResult,
                               colormapDifference,
                               colorbarLevelsDifference,
                               fileout=outFileName,
                               title=title,
                               modelTitle="{}".format(mainRunName),
                               obsTitle=observationTitleLabel,
                               diffTitle="Model-Observations",
                               cbarlabel=unitsLabel)
Example #39
0
urls = re.findall(r'href=[\'"]?([^\'" >]+)', datasets)
x = re.findall(r'(ooi/.*?.nc)', datasets)
for i in x:
    if i.endswith('.nc') == False:
        x.remove(i)
for i in x:
    try:
        float(i[-4])
    except:
        x.remove(i)
datasets = [os.path.join(tds_url, i) for i in x]
datasets

# Load all files into a single xarray dataset:

ds = xr.open_mfdataset(datasets)
ds = ds.swap_dims({'obs': 'time'})
ds = ds.chunk({'time': 100})
ds = ds.sortby(
    'time'
)  # data from different deployments can overlap so we want to sort all data by time stamp.
ds

# Create dataframe
df = ds.to_dataframe()

# comment these lines back in, if you want to check the sampling frequency (takes a while)
#res = (pd.Series(df.index[1:]) - pd.Series(df.index[:-1])).value_counts()
#res

# Choose a variable to examine:
Example #40
0
 def load(self):  # load raw_data ie array of each images
     xr.merge([xr.open_dataset(f) for f in glob.glob(self.path + '/*.nc')])  # merge different files from the given path
     ds = xr.open_mfdataset(self.path + '/*.nc')  # load the file as dataset
     self.raw_data = np.array(ds.variables[self.data_key])
     self.dataset = [self.raw_data]
Example #41
0
import xarray as xr
#url="http://10.10.11.103:5000/thredds/dodsC/historical/2017-03/*.grb2"
#url="http://10.10.11.103:5000/thredds/catalog/historical/2017-03/catalog.html?dataset=historical_grib/2017-03/2017-03-25-06.grb2"
#url="http://10.10.11.103:5000/thredds/catalog/historical/2017-03/catalog.html?dataset=historical_grib/2017-03/2017-03-25-18.grb2"
#url="http://10.10.11.103:5000/thredds/dodsC/historical/2017-03/2017-03-25-18.grb2.html"
#url="http://10.10.11.103:5000/thredds/fileServer/historical/2017-03/2017-03-25-18.grb2"
url = 'http://10.10.11.103:5000/thredds/dodsC/historical/2017-03/2017-03-25-18.grb2?Temperature_surface'
#ds=xr.open_mfdataset(url,engine='pydap')
print(url)
ds = xr.open_dataset(url)
print(ds)

ds2 = xr.open_mfdataset([url])
print(ds2)
Example #42
0
    # Check if it is the most recent forecast
    # If so grab 48hr forecast
    if (x.datetime[-1] == c_for_end):
        x = x.isel(datetime=np.arange(1, 47))
    # Otherwise only grab the 12 hours
    else:
        x = x.isel(
            datetime=np.arange(1, 25)
        )  # Grab forecast hours 02 - 25(01) (we can't use the first 01 forecsat hour, because radiation vars were saved accumulated, thus we don't have the first value. This is fine we just use later forecast hours by 1 hour

    x.load()
    return x


ds = xr.open_mfdataset(all_files,
                       concat_dim='datetime',
                       engine='netcdf4',
                       preprocess=lambda x: preprocess(x))

# Adjust to local time zone (i.e. from UTC to MST, local_time_offset should = -7)
ds['datetime'] = pd.to_datetime(
    ds.datetime.values) + datetime.timedelta(hours=local_time_offset)

# Move to ascii dir
if not os.path.isdir(ascii_dir):
    os.mkdir(ascii_dir)
os.chdir(ascii_dir)

# Extract grid cells we want to export
print 'Extracting cells within lat/long box'
ds = ds.where((ds.gridlat_0 > lat_r[0]) & (ds.gridlat_0 < lat_r[1]) &
              (ds.gridlon_0 > lon_r[0]) & (ds.gridlon_0 < lon_r[1]),
Example #43
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 22 11:00:05 2019

@author: julia_wagemann
"""

import xarray as xr
from era5_in_gee_functions import createFileList
import time

execTime = time.time()

directory = '/Volumes/FREECOM HDD/era5_tp/nc/1979/'
month_list = [
    '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'
]
for i in month_list:
    fileList = createFileList(directory, './era5_tp_1979_' + i + '*')
    fileList.sort()
    print(fileList)

    array = xr.open_mfdataset(fileList)
    outFileName = '/Volumes/FREECOM HDD/era5_tp/nc/monthly/1979/era5_tp_1979_' + i + '.nc'
    array.resample(time='1M').sum().to_netcdf(outFileName,
                                              mode='w',
                                              compute=True)

print("The script took {0} second !".format(time.time() - execTime))
Example #44
0
    def __init__(
        self,
        out_var=None,
        out_mean=None,
        forecast_dir=None,
        forcings_dir=None,
        reanalysis_dir=None,
        transform=None,
        hparams=None,
        **kwargs,
    ):
        """
        Constructor for the ModelDataset class

        :param out_var: Variance of the output variable, defaults to None
        :type out_var: float, optional
        :param out_mean: Mean of the output variable, defaults to None
        :type out_mean: float, optional
        :param forecast_dir: The directory containing the FWI-Forecast data, defaults \
to None
        :type forecast_dir: str, optional
        :param forcings_dir: The directory containing the FWI-Forcings data, defaults \
to None
        :type forcings_dir: str, optional
        :param reanalysis_dir: The directory containing the FWI-Reanalysis data, \
to defaults to None
        :type reanalysis_dir: str, optional
        :param transform: Custom transform for the input variable, defaults to None
        :type transform: torch.transforms, optional
        :param hparams: Holds configuration values, defaults to None
        :type hparams: Namespace, optional
        """

        super().__init__(
            out_var=out_var,
            out_mean=out_mean,
            forecast_dir=forecast_dir,
            forcings_dir=forcings_dir,
            reanalysis_dir=reanalysis_dir,
            transform=transform,
            hparams=hparams,
            **kwargs,
        )

        # Number of input and prediction days
        assert (self.hparams.in_days > 0 and self.hparams.out_days > 0
                ), "The number of input and output days must be > 0."
        self.n_input = self.hparams.in_days
        self.n_output = self.hparams.out_days

        # Generate the list of all valid files in the specified directories
        get_inp_time = (lambda x: int(x.split("_20")[1][:2]) * 10000 + int(
            x.split("_20")[1][2:].split("_1200_hr_")[0][:2]) * 100 + int(
                x.split("_20")[1][2:].split("_1200_hr_")[0][2:]))
        inp_files = sorted(
            sorted(glob(f"{forcings_dir}/ECMWF_FO_20*.nc")),
            # Extracting the month and date from filenames to sort by time.
            key=get_inp_time,
        )
        get_out_time = (lambda x: int(x[-24:-22]) * 10000 + int(x[-22:-20]) *
                        100 + int(x[-20:-18]))
        out_files = sorted(
            glob(f"{reanalysis_dir}/ECMWF_FWI_20*_1200_hr_fwi_e5.nc"),
            # Extracting the month and date from filenames to sort by time.
            key=get_out_time,
        )

        # Loading list of test-set files
        if self.hparams.test_set:
            with open(self.hparams.test_set, "rb") as f:
                test_out = pickle.load(f)
                time_indices = set(map(get_inp_time, inp_files))
                inp_index = {
                    k: [x for x in inp_files if get_inp_time(x) == k]
                    for k in time_indices
                }
                test_inp = sum(
                    [
                        inp_index[t] for f in test_out
                        for t in (get_out_time(f), )
                    ],
                    [],
                )

        # Handling the input and output files using test-set files
        if not self.hparams.dry_run and "test_inp" in locals():
            if hasattr(self.hparams, "eval"):
                inp_files = test_inp
                out_files = test_out
            else:
                inp_files = list(set(inp_files) - set(test_inp))
                out_files = list(set(out_files) - set(test_out))

        if self.hparams.dry_run:
            inp_files = inp_files[:8 * (self.n_output + self.n_input)]
            out_files = out_files[:2 * (self.n_output + self.n_input)]

        # Align the output files with the input files
        offset = len(out_files) - len(inp_files) // 4
        out_files = out_files[offset:] if offset > 0 else out_files

        # Checking for valid date format
        out_invalid = lambda x: not (1 <= int(x[-22:-20]) <= 12 and 1 <= int(x[
            -20:-18]) <= 31)
        assert not (sum([out_invalid(x) for x in out_files
                         ])), ("Invalid date format for output file(s)."
                               "The dates should be formatted as YYMMDD.")
        self.out_files = out_files

        inp_invalid = lambda x: not (
            1 <= int(x.split("_20")[1][2:].split("_1200_hr_")[0][:2]) <= 12 and
            1 <= int(x.split("_20")[1][2:].split("_1200_hr_")[0][2:]) <= 31)
        assert not (sum([inp_invalid(x) for x in inp_files
                         ])), ("Invalid date format for input file(s)."
                               "The dates should be formatted as YYMMDD.")
        self.inp_files = inp_files

        # Consider only ground truth and discard forecast values
        preprocess = lambda x: x.isel(time=slice(0, 1))

        with xr.open_mfdataset(
                inp_files,
                preprocess=preprocess,
                engine="h5netcdf",
                parallel=False if self.hparams.dry_run else True,
                combine="by_coords",
        ) as ds:
            self.input = ds.load()

        with xr.open_mfdataset(
                out_files,
                preprocess=preprocess,
                engine="h5netcdf",
                parallel=False if self.hparams.dry_run else True,
                combine="by_coords",
        ) as ds:
            self.output = ds.load()

        # Ensure timestamp matches for both the input and output
        assert self.output.fwi.time.min(skipna=True) == self.input.rh.time.min(
            skipna=True)
        assert self.output.fwi.time.max(skipna=True) == self.input.rh.time.max(
            skipna=True)
        assert len(self.input.time) == len(self.output.time)

        log.info(
            f"Start date: {self.output.fwi.time.min(skipna=True)}",
            f"\nEnd date: {self.output.fwi.time.max(skipna=True)}",
        )

        # Loading the mask for output variable if provided as generating from NaN mask
        self.mask = (torch.nn.functional.max_pool2d(
            (~torch.from_numpy(
                np.load(self.hparams.mask) if self.hparams.mask else
                ~np.isnan(self.output["fwi"][0].values))).unsqueeze(0).float(),
            kernel_size=3,
            stride=1,
            padding=1,
        ).squeeze() == 0).cuda()

        # Mean of output variable used for bias-initialization.
        self.out_mean = out_mean if out_mean else 15.292629

        # Variance of output variable used to scale the training loss.
        self.out_var = (out_var
                        if out_var else 18.819166 if self.hparams.loss == "mae"
                        else 414.2136 if self.hparams.mask else 621.65894)

        # Input transforms including mean and std normalization
        self.transform = (
            transform if transform else transforms.Compose([
                transforms.ToTensor(),
                # Mean and standard deviation stats used to normalize the input data
                # to the mean of zero and standard deviation of one.
                transforms.Normalize(
                    [
                        x for i in range(self.n_input) for x in (
                            72.47605,
                            279.96622,
                            2.4548044,
                            6.4765906,
                        )
                    ],
                    [
                        x for i in range(self.n_input) for x in (
                            17.7426847,
                            21.2802498,
                            6.3852794,
                            3.69688883,
                        )
                    ],
                ),
            ]))
# In[6]:


#############################################################
# Load in Data
#############################################################
E = ed.EsioData.load()


# In[7]:


# Load obs already aggregated by region
import timeit
ds_obs = xr.open_mfdataset(E.obs['NSIDC_0081']['sipn_nc']+'_yearly_agg/*.nc', concat_dim='time')
ds_obs = ds_obs.Extent
# use smoothed obs to compute damped anom
# 10 days is assumed but would be better to embed this smoothing window in alpha
# and then use it here
ds_obs_smooth = ds_obs.rolling(time=10, min_periods=1, center=True).mean()

print(ds_obs.region_names.values)


# In[8]:


# Load obs already aggregated by region, these are also computed after smoothing
# the obs with 10 day running mean
ds_climo = xr.open_mfdataset(E.obs['NSIDC_0079']['sipn_nc']+'_yearly_agg_climatology/*.nc', concat_dim='time')
Example #46
0
def main(config_path):
    config = {}
    with open(config_path) as f_config:
        config = json.load(f_config)

    doms = sorted(
        set(
            map(lambda x: x.split('_')[0],
                os.listdir(os.path.join(config['output-wrf'])))))
    ds = [
        xr.open_mfdataset(os.path.join(config['output-wrf'],
                                       '{}*.nc'.format(dom)),
                          concat_dim='time') for dom in doms
    ]
    extents = {
        'ireland': [-12, -3, 51, 55.5],
        'europe': [
            ds[0].lon.min(), ds[0].lon.max(), ds[0].lat.min(),
            ds[0].lat.max() - 1
        ]
    }

    for z in zip(*map(lambda x: list(x.groupby('time')), ds)):
        for i, (t, d) in enumerate(z):
            is_fst = i == 0
            is_lst = i == len(doms) - 1

            t = pd.to_datetime(t)
            t_save = t.strftime('%Y%m%d%H%M')

            if is_fst:
                print(t)

            print('\t{} - temperature & pressure (Ireland)'.format(i))
            figs = ['t2-p-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.t2 - 273.15,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   levels=LEVELS['t2-ir'],
                   norm=MidpointNormalize(midpoint=0),
                   cmap=CMAP['t2'],
                   extent=extents['ireland'],
                   extend='both',
                   label='$^o$C',
                   title='Temperature and Pressure',
                   colorbar=is_lst,
                   config=config)
            if is_fst:
                plot2d(d.lon.loc[extents['ireland'][0]:extents['ireland'][1]],
                       d.lat.loc[extents['ireland'][2]:extents['ireland'][3]],
                       d.p_sl.sel(lon=slice(extents['ireland'][0],
                                            extents['ireland'][1]),
                                  lat=slice(extents['ireland'][2],
                                            extents['ireland'][3])) * 1e-2,
                       fig=figs[-1],
                       newfig=False,
                       levels_n=10,
                       what='contour',
                       config=config)
            print('\t{} - temperature & pressure (Europe)'.format(i))
            figs += ['t2-p-e_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.t2 - 273.15,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   levels=LEVELS['t2-e'],
                   norm=MidpointNormalize(midpoint=0),
                   cmap=CMAP['t2'],
                   extent=extents['europe'],
                   extend='both',
                   label='$^o$C',
                   title='Temperature and Pressure',
                   colorbar=is_lst,
                   config=config)
            if is_fst:
                plot2d(d.lon,
                       d.lat,
                       d.p_sl * 1e-2,
                       fig=figs[-1],
                       newfig=False,
                       levels_n=20,
                       what='contour',
                       config=config)

            print('\t{} - rain (Ireland)'.format(i))
            figs += ['rain-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.rain,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   levels=LEVELS['rain'],
                   cmap=CMAP['rain'],
                   extent=extents['ireland'],
                   label='mm/h',
                   format='%.1f',
                   title='Precipitation',
                   colorbar=is_lst,
                   config=config)
            print('\t{} - rain (Europe)'.format(i))
            figs += ['rain-e_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.rain,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   levels=LEVELS['rain'],
                   cmap=CMAP['rain'],
                   extent=extents['europe'],
                   label='mm/h',
                   format='%.1f',
                   title='Precipitation',
                   colorbar=is_lst,
                   config=config)

            print('\t{} - wind (Ireland)'.format(i))
            step = 2
            figs += ['wind-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   xu.sqrt(d.u10**2 + d.v10**2),
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   levels=LEVELS['wind'],
                   extent=extents['ireland'],
                   cmap=CMAP['wind'],
                   label='m/s',
                   format='%.0f',
                   title='Wind speed & direction',
                   colorbar=is_lst,
                   config=config)
            if is_fst:
                plot2d(d.lon[::step],
                       d.lat[::step],
                       d.isel(lat=slice(None, None, step),
                              lon=slice(None, None, step)),
                       fig=figs[-1],
                       newfig=False,
                       t=t,
                       what='quiver',
                       config=config)
            print('\t{} - wind (Europe)'.format(i))
            step = 4
            figs += ['wind-e_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   xu.sqrt(d.u10**2 + d.v10**2),
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   levels=LEVELS['wind'],
                   extent=extents['europe'],
                   cmap=CMAP['wind'],
                   label='m/s',
                   format='%.0f',
                   title='Wind speed & direction',
                   colorbar=is_lst,
                   config=config)
            if is_fst:
                plot2d(d.lon[::step],
                       d.lat[::step],
                       d.isel(lat=slice(None, None, step),
                              lon=slice(None, None, step)),
                       fig=figs[-1],
                       newfig=False,
                       t=t,
                       what='quiver',
                       config=config)

            print('\t -PM2.5 (Ireland)')
            figs += ['pm25-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.pm25[0, :, :],
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['ireland'],
                   levels=LEVELS['pm25'],
                   cmap=CMAP['a'],
                   label='PM2.5 (ug/m$^3$)',
                   format='%.1f',
                   title='PM2.5',
                   colorbar=is_lst,
                   config=config)
            print('\t -PM2.5 (Europe)')
            figs += ['pm25-eu_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.pm25[0, :, :],
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['europe'],
                   levels=LEVELS['pm25'],
                   cmap=CMAP['a'],
                   label='PM2.5 (ug/m$^3$)',
                   format='%.1f',
                   title='PM2.5',
                   colorbar=is_lst,
                   config=config)

            print('\t -PM10 (Ireland)')
            figs += ['pm10-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.pm10[0, :, :],
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['ireland'],
                   levels=LEVELS['pm10'],
                   cmap=CMAP['a'],
                   label='PM10 (ug/m$^3$)',
                   format='%.1f',
                   title='PM10',
                   colorbar=is_lst,
                   config=config)
            print('\t -PM10 (Europe)')
            figs += ['pm10-eu_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.pm10[0, :, :],
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['europe'],
                   levels=LEVELS['pm10'],
                   cmap=CMAP['a'],
                   label='PM10 (ug/m$^3$)',
                   format='%.1f',
                   title='PM10',
                   colorbar=is_lst,
                   config=config)

            print('\t -SO2 (Ireland)')
            figs += ['so2-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.so2_concentration[0, :, :] * 1e3,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['ireland'],
                   extend='both',
                   levels=LEVELS['so2'],
                   cmap=CMAP['a'],
                   label='SO2 (ppbv)',
                   format='%.1f',
                   title='SO2',
                   colorbar=is_lst,
                   config=config)
            print('\t -SO2 (Europe)')
            figs += ['so2-eu_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.so2_concentration[0, :, :] * 1e3,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['europe'],
                   extend='both',
                   levels=LEVELS['so2'],
                   cmap=CMAP['a'],
                   label='SO2 (ppbv)',
                   format='%.1f',
                   title='SO2',
                   colorbar=is_lst,
                   config=config)

            print('\t -O3 (Ireland)')
            figs += ['o3-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.o3_concentration[0, :, :] * 1e3,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['ireland'],
                   levels=LEVELS['o3'],
                   cmap=CMAP['a'],
                   label='O3 (ppbv)',
                   format='%.1f',
                   title='O3',
                   colorbar=is_lst,
                   config=config)
            print('\t -O3 (Europe)')
            figs += ['o3-eu_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.o3_concentration[0, :, :] * 1e3,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['europe'],
                   levels=LEVELS['o3'],
                   cmap=CMAP['a'],
                   label='O3 (ppbv)',
                   format='%.1f',
                   title='O3',
                   colorbar=is_lst,
                   config=config)

            print('\t -NOx (Ireland)')
            figs += ['nox-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.nox_concentration[0, :, :] * 1e3,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['ireland'],
                   extend='both',
                   levels=LEVELS['nox'],
                   cmap=CMAP['a'],
                   label='NOx (ppbv)',
                   format='%.1f',
                   title='NOx',
                   colorbar=is_lst,
                   config=config)
            print('\t -NOx (Europe)')
            figs += ['nox-eu_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.nox_concentration[0, :, :] * 1e3,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['europe'],
                   extend='both',
                   levels=LEVELS['nox'],
                   cmap=CMAP['a'],
                   label='NOx (ppbv)',
                   format='%.1f',
                   title='NOx',
                   colorbar=is_lst,
                   config=config)

            #            plot2d(
            #            d.lon, d.lat, d.rh,
            #            fig=figs[-1], newfig=is_fst, t=t,
            #            extend=extents['europe'],
            #            levels=LEVELS['pm25'], cmap=CMAP['a'],
            #            label='PM2.5 (ug/m$^2$)',
            #            title='PM2.5',
            #            config=config)

            print('\t{} - relative humidity (Ireland)'.format(i))
            figs += ['rh-ir_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.rh,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['ireland'],
                   levels=LEVELS['rh'],
                   cmap=CMAP['rh'],
                   label='%',
                   format='%.1f',
                   title='Relative Humidity',
                   colorbar=is_lst,
                   config=config)
            print('\t{} - relative humidity (Europe)'.format(i))
            figs += ['rh-e_{}'.format(t_save)]
            plot2d(d.lon,
                   d.lat,
                   d.rh,
                   fig=figs[-1],
                   newfig=is_fst,
                   t=t,
                   extent=extents['europe'],
                   levels=LEVELS['rh'],
                   cmap=CMAP['rh'],
                   label='%',
                   format='%.1f',
                   title='Relative Humidity',
                   colorbar=is_lst,
                   config=config)

        for fig in figs:
            plt.figure(fig)
            plt.savefig(os.path.join(config['imgs'], fig))
            plt.close(fig)
Example #47
0
def draw_sst(sel_year, sel_month):
    st = time.time()
    np.seterr(divide='ignore', invalid='ignore')
    print("darwing sst plot for " + str(sel_month).zfill(2) + " " +
          str(sel_year))
    files_cli = sorted(
        glob.glob(
            os.path.join(
                '/home/alley/work/Dong/mongo/seasonal_analysis/data/data/download_from_mongo/cli',
                'sst_*.grb')))
    f_cli = xr.open_mfdataset(files_cli,
                              concat_dim="time",
                              combine="nested",
                              engine="cfgrib",
                              parallel=True)
    h_cli = f_cli["sst"]
    h_cli_ori = h_cli.mean(dim="time").values

    file_cur = "/home/alley/new_disk/data/sst_" + str(sel_year) + str(
        sel_month).zfill(2) + ".grb"
    f_cur = xr.open_mfdataset(file_cur, engine="cfgrib", parallel=True)
    h_cur_ori = f_cur["sst"]
    h_cur = (h_cur_ori.values - 273.15)
    h_cur = np.nan_to_num(h_cur, nan=-999)

    lat = f_cur["latitude"].values
    lon = f_cur["longitude"].values

    h_ano = h_cur_ori - h_cli_ori
    h_ano = np.nan_to_num(h_ano.values, nan=-999)
    # print(h_ano)

    et1 = time.time()
    # print(et1 - st)
    # leftString = "SST in " + str(sel_month) + str(sel_year)
    # rightString = "~S~o~N~C"
    wks_type = 'png'
    wks = Ngl.open_wks(
        wks_type, '/home/alley/work/Dong/mongo/seasonal_analysis/images/sst_' +
        str(sel_year) + str(sel_month).zfill(2) + '.png')

    res = Ngl.Resources()
    res.nglFrame = False
    res.nglDraw = False
    res.mpLimitMode = "LatLon"
    # res.mpFillOn                  =  True                         #-- turn on map fill
    # res.mpLandFillColor           =  "gray"                     #-- change land color to gray
    # res.mpMinLonF= 50
    # res.mpMaxLonF = 280
    res.mpMinLatF = -45
    res.mpMaxLatF = 45
    res.cnFillOn = True
    res.mpCenterLonF = 120
    res.sfMissingValueV = -999
    res.sfXArray = lon
    res.sfYArray = lat
    res.lbOrientation = "Horizontal"  # horizontal labelbar
    res.cnLinesOn = False
    res.tiMainFontHeightF = 0.015
    res.cnLineLabelsOn = False
    res.cnFillDrawOrder = "Predraw"
    res.cnFillPalette = "BlAqGrYeOrRe"
    res.pmLabelBarDisplayMode = "Always"  #-- turn on a labelbar
    res.tiMainString = "SST in " + str(sel_month).zfill(2) + " " + str(
        sel_year) + " (degC)"
    res.cnLevelSelectionMode = "ExplicitLevels"
    res.cnLevels = np.arange(20, 35, 1)
    plot_cur = Ngl.contour_map(wks, h_cur, res)

    res.cnLevelSelectionMode = "ExplicitLevels"
    res.tiMainString = "SST anomaly in " + str(sel_month).zfill(2) + " " + str(
        sel_year) + " (degC)"
    res.cnFillPalette = "GMT_polar"
    res.cnLevels = np.arange(-3, 4, 1)
    res.pmLabelBarHeightF = 0.3
    plot_ano = Ngl.contour_map(wks, h_ano, res)
    Ngl.panel(wks, [plot_cur, plot_ano], [2, 1], False)
    Ngl.end()
    et2 = time.time()
    # print(et2 - et1)
    print("Finish darwing sst plot for " + str(sel_month).zfill(2) + " " +
          str(sel_year))
Example #48
0
for v in range(len(varnames)):
    vstart = time.time()
    # Get variable name and path
    vn = varnames[v]
    datpath = "/stormtrack/data3/glliu/01_Data/02_AMV_Project/02_stochmod/%s/" % vn

    # Create list of variables
    nclist = ["%s%s_ens%03d.nc" % (datpath, vn, e) for e in mnum]

    # Open dataset
    ds = xr.open_mfdataset(
        nclist,
        concat_dim='ensemble',
        combine='nested',
        compat=
        'identical',  # seems to be strictest setting...not sure if necessary
        parallel="True",
        join="exact"  # another strict selection...
    )

    # Add ensemble as a dimension
    ds = ds.assign_coords({'ensemble': np.arange(1, len(mnum) + 1, 1)})

    # Merge variables to Dataset (assuming they have the same coordinates)
    if v == 0:
        dsall = ds.copy()
    else:
        dsall = xr.merge([dsall, ds])

#%% Get the DJFM and Regional cuts for EOF calculation
Example #49
0
import glob
import matplotlib.pyplot as plt
import time

t0 = time.time()

M03_dir = "/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/"
M06_dir = "/home/savio1/cybertrn_common/common/Data/Satellite_Observations/MODIS/MYD06_L2/"
M03_files = sorted(glob.glob(M03_dir + "MYD03.A2008*.hdf"))
M06_files = sorted(glob.glob(M06_dir + "MYD06_L2.A2008*.hdf"))
total_pix = np.zeros((180, 360))
cloud_pix = np.zeros((180, 360))

for M03, M06 in zip(M03_files, M06_files):

    d06 = xr.open_mfdataset(M06[:])['Cloud_Mask_1km'][:, :, :].values
    d06CM = d06[::3, ::3, 0]
    ds06_decoded = (np.array(d06CM, dtype="byte") & 0b00000110) >> 1
    d03_lat = xr.open_mfdataset(
        M03[:], drop_variables="Scan Type")['Latitude'][:, :].values
    d03_lon = xr.open_mfdataset(
        M03[:], drop_variables="Scan Type")['Longitude'][:, :].values

    lat = d03_lat[::3, ::3]
    lon = d03_lon[::3, ::3]

    l_index = (lat + 89.5).astype(int).reshape(lat.shape[0] * lat.shape[1])
    lat_index = np.where(l_index > -1, l_index, 0)
    ll_index = (lon + 179.5).astype(int).reshape(lon.shape[0] * lon.shape[1])
    lon_index = np.where(ll_index > -1, ll_index, 0)
    for i, j in zip(lat_index, lon_index):
Example #50
0
def get_files_type(case_name,case_type,var_cam,years) :

    
    type_desc = {}
    type_desc['cam'] = ['/glade/p/rneale']


    allowed_types = ['cam','reanal']

    if case_type not in allowed_types : print(case_type+ ' files - type not allowed')
    if case_type     in allowed_types : print(case_type+ ' files - type allowed') 

    print('-Grabbing data type/case -- '+case_type+' '+case_name)
 

    yr0 = years[0]
    yr1 = years[1]

## GRAB ANALYSIS ##

    lat_rev = False
    lcoord_names = False

    
    if var_cam != 'TS':
        
        if case_type=='reanal' :
            dir_rda = '/glade/collections/rda/data/'
            if case_name=='ERA5' :
                var_anal_fmap = {'T': 't',   'Q':'q'}
                var_anal_vmap = {'T': 'T',   'Q':'Q'}
                var_vname = var_anal_vmap[var_cam] ; var_fname = var_anal_fmap[var_cam] 
                rda_cat = 'ds633.1'

                dir_glade = dir_rda+rda_cat+'/'
                files_glade  = np.array([dir_rda+rda_cat+"/e5.moda.an.pl/%03d/e5.moda.an.pl.128_130_%s.ll025sc.%03d010100_%03d120100.nc"%(y,var_fname,y,y) for y in range(yr0,yr1+1)])
                print(files_glade)
                lat_rev = True
                lcoord_names = True
            
            if case_name=='ERAI' :
                var_anal_fmap = {'T': 't',   'Q':'q'}
                var_anal_vmap = {'T': 'T',   'Q':'Q'}
                var_vname = var_anal_vmap[var_cam] ; var_fname = var_anal_fmap[var_cam] 
                if var_cam in ['T'] : var_fname = 'sc'
                if var_cam in ['U','V'] : var_fname = 'uv' 
                rda_cat = 'ds627.1'

                dir_glade = dir_rda+rda_cat+'/'
                files_glade  = np.array([dir_rda+rda_cat+"/ei.moda.an.pl/ei.moda.an.pl.regn128%s.%03d%02d0100.nc"%(var_fname,y,m) for y in range(yr0,yr1+1) for m in range(1,12)])
                print(files_glade)
                print('hi4')
            
            
            if case_name=='MERRA2' : #### NOT CLEAR MMEAN DATA AVAILABLE
                resn = '1.9x2.5'
#            var_anal_fmap = {'T': '',   'Q':'q'}
                var_anal_vmap = {'T': 'T',   'Q':'Q'}
                var_vname = var_anal_vmap[var_cam] 
                rda_cat = 'ds613.3'

                dir_glade = dir_rda+rda_cat+'/'
                files_glade  = np.array([dir_rda+rda_cat+"/%s/%03d/MERRA2%03d010100_%03d120100.nc"%(resn,y,y,y) for y in range(yr0,yr1+1)])
                print(files_glade)
            
    
            
#### GRAB CAM SST AMIP DATASET FOR NOW FOR ANALYSES
        
    if (var_cam=='TS') :
        print('- Grabbing file(s) for AMIP and REANALYSES from CESM inputdata -')
        dir_inputdata = '/glade/p/cesmdata/cseg/inputdata/atm/cam/sst/'
        hadisst_file = 'sst_HadOIBl_bc_0.9x1.25_1850_2020_c210521.nc'
        files_glade = dir_inputdata+hadisst_file
        var_vname = 'SST_cpl'

    
            
            
## POINT TO FILES ##

    
    data_files = xr.open_mfdataset(files_glade,parallel=True,chunks={"time": 1})
    
#    data_files = xr.open_mfdataset(files_glade)
    
    
## STANDARDIZE COORDS/DIMS ##
    
    if lcoord_names : data_files = data_files.rename({'latitude':'lat', 'longitude':'lon', 'level':'lev'})
    
# Reverse lat array to get S->N if needed
    if lat_rev : data_files = data_files.reindex(lat=list(reversed(data_files.lat)))
#    print(data_files)
    
        
    return data_files,var_vname
Example #51
0
outpath = '/proj/bolinc/users/x_sebsc/pr_disagg/smhi/preprocessed/'
os.system(f'mkdir -p {outpath}')

# create list of available files
dates_all = pd.date_range(startdate, enddate, freq='1d')
ifiles = []
for date in dates_all:
    fname = f'{datapath}/smhi_radar_{date.strftime("%Y%m%d")}.nc'
    if os.path.exists(fname):
        ifiles.append(fname)

if len(ifiles) == 0:
    raise Exception('no input files found!')

# now open all files lazily
# they are automatically chunked per file (thus per day)
data_raw = xr.open_mfdataset(ifiles, combine='nested', concat_dim='time')
data_raw = data_raw['__xarray_dataarray_variable__']
# convert to 32bit
data_raw = data_raw.astype('float32')

# sum to desired timeresolution
agg = data_raw.resample(time=f'{tres}h', label='left').sum(skipna=False)

time_daily = agg.time[::int(24 / tres)]

doy = time_daily.dt.dayofyear.values

np.save(f'{outpath}/{startdate}-{enddate}_tres{tres}_doy', doy)
import xarray as xr
from scipy.stats import pearsonr

from paths_usa import *

from dask.diagnostics import ProgressBar
ProgressBar().register()

# MERRA-2 and ERA5 only unique interpolated locations
print('prepare turbine location data')
# open turbine files
wt_mer = pd.read_csv(usa_path + '/turbine_data_mer.csv', index_col=0)
wt_era = pd.read_csv(usa_path + '/turbine_data_era.csv', index_col=0)

# open wind files
wind_mer = xr.open_mfdataset(mer_path + "/eff_ws/merra2_wind_USA_*.nc",
                             chunks={'time': 38})
alpha_mer = xr.open_mfdataset(mer_path + "/eff_ws/merra2_alpha_USA_*.nc",
                              chunks={'time': 38})
wind_era = xr.open_mfdataset(era_path + "/eff_ws/era5_wind_USA_*.nc",
                             chunks={'time': 38})
alpha_era = xr.open_mfdataset(era_path + "/eff_ws/era5_alpha_USA_*.nc",
                              chunks={'time': 38})

# Create dataframe with sequence the size of MERRA-2 grid to find out which turbines interpolate to the same point
in_seq_mer = xr.Dataset(
    {
        'x':
        (['lat', 'lon'], np.array(range(
            wind_mer.wh50.isel(time=0).values.size)).reshape(
                wind_mer.wh50.isel(time=0).values.shape))
    },
#          'name_std' : ['sst', 'patm',   'eta',   'sss', 'u_s',    'v_s']}
                     )
fields = pd.DataFrame( \
        {'name_CAFE': ['sst'],
         'name_std' : ['sst']}
                      
name_dict = fields.set_index('name_CAFE').to_dict()['name_std']

# Initial dates to include (takes approximately 1 min 30 sec per date) -----
init_dates = pd.date_range('2002-2','2016-5' , freq='1MS')

# Ensembles to include -----
ensembles = range(1,12)

path = fcst_folder + '/yr2016/mn1/OUTPUT.1/' + fcst_filename + '.nc'
dataset = xr.open_mfdataset(path, autoclose=True)
time_use = dataset.time[:366]

years = range(2002,2017)
months = range(1,13)
ensembles = range(1,12)

for year in years:
    print(year)
    print('----------')
    for idx, variable in enumerate(fields['name_CAFE']):
        print(variable)
        
        savename = 'cafe.fcst.v1.ocean.' + fields['name_std'][idx] + '.' + str(year) + '.clim.nc'
        try:
            temp = xr.open_mfdataset('/OSM/CBR/OA_DCFP/data/intermediate_products/pylatte_climatologies/' + savename, autoclose=True)
Example #54
0
def FLDAS_to_csv(year, min_month, max_month, min_lat, max_lat,\
               min_lon, max_lon):
    ''' Prepare file for WGENW

    This method prepares the  csvfile to run the weather generator

    Args:
        - year (int): The year of interest. Ontained from findYear method
        - min_month (int): The first month of the season of interest
        - max_month (int): The last month of the season of interest
        - min_lat (float): The minimum latitude for the bounding box
        - max_lat (float): The maximum latitude for the bounding box
        - min_lon (float): The minimum longitude for the bounding box
        - max_lon (float): The maximum longitude for the bounding box

    '''
    #Get the season
    season = np.arange(min_month,max_month+1,1)
    # Get a csv file ready
    csv_file = str(os.getcwd())+'/FLDAS_WGEN.csv'
    #Start a counting index for the station ID.
    id_idx = 1
    #Density of water
    rho_w = 997
    with open(csv_file,'w',newline='') as csvfile:
        wgenwriter = csv.writer(csvfile, delimiter=',')
        wgenwriter.writerow(['station id','lon','lat',\
                             'year','month','min. temperature',
                             'max. temperature','cloud fraction',
                             'wind speed','precipitation','wet'])
        for month in season:
            if month<10:
                month_str='0'+str(month)
            else:
                month_str=str(month)
            subdir = path_daily+'/'+str(year)+'/'+month_str
            nc_files = (glob.glob(subdir+'/*.nc'))
            # open the data
            nc_fid = xr.open_mfdataset(nc_files)
            # Get the index for lat/lon
            lats = np.where(np.logical_and(nc_fid['Y'].values>=min_lat, nc_fid['Y'].values<=max_lat))[0]
            lons = np.where(np.logical_and(nc_fid['X'].values>=min_lon, nc_fid['X'].values<=max_lon))[0]
            # Get the missing value flag
            flag_miss = nc_fid.attrs['missing_value']
            for lat_idx in lats:
                for lon_idx in lons:
                    #Next generate a station_id
                    if len(str(id_idx))==1:
                        station_ID = 'FLDAS_0000'+str(id_idx)
                    elif len(str(id_idx))==2:
                        station_ID = 'FLDAS_000'+str(id_idx)
                    elif len(str(id_idx))==3:
                        station_ID = 'FLDAS_00'+str(id_idx)
                    elif len(str(id_idx))==4:
                        station_ID = 'FLDAS_0'+str(id_idx)
                    elif len(str(id_idx))==5:
                        station_ID = 'FLDAS_'+str(id_idx)
                    else:
                        sys.exit('Station_ID index out of bounds')
                    ## Deal with temperature
                    # Replace missing values
                    data = nc_fid['Tair_f_tavg'].values
                    data = data.astype('float')
                    data[data==flag_miss]=np.nan
                    #Calculate minT and maxT
                    minT = np.nanmin(data[:,lat_idx,lon_idx])- 273.15 #Convert to C
                    maxT = np.nanmax(data[:,lat_idx,lon_idx])- 273.15 #Convert to C
                    ## wind speed
                    # Replace missing values
                    data = nc_fid['Wind_f_tavg'].values
                    data = data.astype('float')
                    data[data==flag_miss]=np.nan
                    # Take the average for the month
                    avWind =np.nanmean(data[:,lat_idx,lon_idx])
                    # Precipitation
                    data = nc_fid['Rainf_f_tavg'].values
                    data = data.astype('float')
                    data[data==flag_miss]=np.nan
                    # Calculate the total for the month
                    totP = np.nansum(data[:,lat_idx,lon_idx])
                    # Convert to mm/day
                    totP = totP*1000*86400/rho_w
                    # Number of wet days in a month
                    wet_days = len(np.where(data[:,lat_idx,lon_idx]>0)[0])
                    # Write it out
                    wgenwriter.writerow([station_ID,str(round(nc_fid['X'].values[lon_idx],2)),\
                                         str(round(nc_fid['Y'].values[lat_idx],2)),\
                                         str(year), str(month),str(round(minT,2)),\
                                         str(round(maxT,2)),'0.5',str(round(avWind,2)),\
                                         str(round(totP,2)),str(wet_days)])
                    # Close netcdf file
                    nc_fid.close()
                    # update the counter
                    id_idx+=1
Example #55
0
 def open_output(self):
     filenames = [self._get_output_filename(times) for times in self._times]
     return xr.open_mfdataset(filenames)
Example #56
0
    def __init__(self, **kwargs):

        rpath = kwargs.get("rpath", "./d3d/")

        folders = kwargs.get(
            "folders", None
        )  # [os.path.join(os.path.abspath(loc),name) for name in os.listdir(loc) if os.path.isdir(os.path.join(loc,name))]

        if folders:
            self.folders = folders
        else:
            self.folders = [rpath]

        # check if many tags present
        ifiles = glob.glob(self.folders[0] + "/*_model.json")

        if len(ifiles) > 1:
            # ---------------------------------------------------------------------
            logger.warning(
                "more than one configuration, specify tag argument \n")
        # ---------------------------------------------------------------------

        tag = kwargs.get("tag", None)

        if tag:
            ifile = self.folders[0] + "/" + tag + "_model.json"
        else:
            ifile = ifiles[0]

        # ---------------------------------------------------------------------
        logger.info("reading data based on {} \n".format(ifile))
        # ---------------------------------------------------------------------

        with open(ifile, "rb") as f:
            info = pd.read_json(f, lines=True).T
            info[info.isnull().values] = None
            self.info = info.to_dict()[0]

        grid = r2d.read_file(self.folders[0] + "/" + self.info["tag"] + ".grd")

        deb = np.loadtxt(self.folders[0] + "/" + self.info["tag"] + ".dep")

        # create mask

        d = deb[1:-1, 1:-1]
        self.w = d == -999.0

        b = deb[:-1, :-1]
        b[b == -999.0] = np.nan

        self.dem = xr.Dataset(
            {"bathymetry": (["latitude", "longitude"], -b)},
            coords={
                "longitude": ("longitude", grid.lons[0, :].values),
                "latitude": ("latitude", grid.lats[:, 0].values),
            },
        )

        self.grid = grid

        # READ DATA

        nfiles = [
            folder + "/" + "trim-" + self.info["tag"] + ".nc"
            for folder in self.folders
        ]

        ds = xr.open_mfdataset(nfiles,
                               combine="by_coords",
                               data_vars="minimal")

        self.Dataset = ds

        # clean duplicates
        self.Dataset = self.Dataset.sel(
            time=~self.Dataset.indexes["time"].duplicated())

        dic = self.info.copy()  # start with x's keys and values
        dic.update(
            kwargs)  # modifies z with y's keys and values & returns None

        if "sa_date" not in dic.keys():
            dic.update({"sa_date": self.Dataset.time.values[0]})

        if "se_date" not in dic.keys():
            dic.update({"se_date": self.Dataset.time.values[-1]})

        self.obs = obs(**dic)
Example #57
0
    def __init__(self, **kwargs):

        rpath = kwargs.get("rpath", "./schism/")

        folders = kwargs.get(
            "folders", None
        )  # [os.path.join(os.path.abspath(loc),name) for name in os.listdir(loc) if os.path.isdir(os.path.join(loc,name))]

        if folders:
            self.folders = folders
        else:
            self.folders = [rpath]

        datai = []

        tag = kwargs.get("tag", "schism")

        misc = kwargs.get("misc", {})

        for folder in self.folders:

            logger.info(" Combining output for folder {}\n".format(folder))

            xdat = glob.glob(folder + "/outputs/schout_[!0]*.nc")
            xdat.sort(key=lambda f: int("".join(filter(str.isdigit, f))))

            if len(xdat) > 0:
                datai.append(xdat)  # append to list

            else:  # run merge output

                with open(folder + "/" + tag + "_model.json", "r") as f:
                    info = pd.read_json(f, lines=True).T
                    info[info.isnull().values] = None
                    info = info.to_dict()[0]

                p = pm.set(**info)

                p.misc = misc

                p.results()

                self.misc = p.misc

                xdat = glob.glob(folder + "/outputs/schout_[!0]*.nc")
                xdat.sort(key=lambda f: int("".join(filter(str.isdigit, f))))

                datai.append(xdat)  # append to list

        merge = kwargs.get("merge", True)

        if merge:

            datai = flat_list(datai)
            self.Dataset = xr.open_mfdataset(datai,
                                             combine="by_coords",
                                             data_vars="minimal")

            with open(self.folders[-1] + "/" + tag + "_model.json", "r") as f:
                info = pd.read_json(f, lines=True).T
                info[info.isnull().values] = None
                info = info.to_dict()[0]

            p = pm.set(**info)

            if hasattr(p, "stations"):

                logger.info(" Retrieve station timeseries\n")

                dstamp = kwargs.get("dstamp", info["date"])

                p.get_station_data(dstamp=dstamp)
                self.time_series = p.time_series

        else:
            self.Dataset = [
                xr.open_mfdataset(x, combine="by_coords", data_vars="minimal")
                for x in datai
            ]

            ts = []

            for folder in self.folders:

                p = pm.read_model(folder +
                                  "/{}_model.json".format(tag))  # read model

                if hasattr(p, "stations"):

                    logger.info(" Retrieve station timeseries\n")

                    dstamp = kwargs.get("dstamp", p.date)

                    p.get_station_data(dstamp=dstamp)
                    ts.append(p.time_series)

            self.time_series = ts
Example #58
0

def split_list(alist, wanted_parts=1):
    length = len(alist)
    return np.array([
        alist[i * length // wanted_parts:(i + 1) * length // wanted_parts]
        for i in range(wanted_parts)
    ])


files2analyse = split_list(onlyfiles, divisions)

print('Analizing the year ', year, 'from file ', files2analyse[index_files][0],
      '-', files2analyse[index_files][-1], ']')

data = xr.open_mfdataset(files2analyse[index_files])
lon = data.longitude.values
lat = data.latitude.values

init_time = datetime.datetime.strptime(
    str(data.time.isel(time=0).values).split('T')[0], "%Y-%m-%d")
print(init_time)

outfolder = '/g/data/v45/jm5970/trackeddy_output/AVISO+/'

sshatime = data.sla.values
sshatime = ma.masked_where(sshatime <= -2147483647, sshatime)

print('End loading data')
sshashape = np.shape(sshatime)
Example #59
0
# tracking.py creates a dataset which contains the radius (r) as a function of time and height,
# and the thermal midpoint (x_c, y_c) as functions of time


import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

ds = xr.open_mfdataset('/work/bnm/buoyant_entrainment/no_g/data/slice*.nc',concat_dim='t')
contour = np.loadtxt('/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/contour_flux.dat')
midpoint = np.loadtxt('/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/thermal_midpoint_1e4_g0.dat')
rout = '/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/thermal_boundary.nc'
mout = '/work/bnm/buoyant_entrainment/data/no_g/1e4_sim5_no_g/mask.nc'

tracking = xr.Dataset({'r': (['t', 'z'],  contour),
                 'x_c': (['t'], midpoint[1]),
                 'y_c': (['t'], midpoint[2])},
             coords={'t': (['t'], ds.t.values[:-1]), #remove last element from array
                     'z': (['z'], ds.z.values)})

tracking.to_netcdf(rout,engine='scipy')

tracking = xr.open_dataset(rout)

# convert thermal boundary dataset to have same dimensions as rho, u, v, w, ...
r, foo, bar = xr.broadcast(tracking.r,ds.x,ds.y)
delta_x = ds.x - tracking.x_c
delta_y = ds.y - tracking.y_c

mask = xr.ufuncs.sqrt(delta_x ** 2 + delta_y ** 2) < r
rootdir = '/home/ocean_personal_data/graemem/ariane/'
model = 'orca025_global_5d'
experiment = 'quant_back_seedNAn1_t*_sign27.7-28_MLrefz8delsig0.01'
filepath = rootdir+'experiments/'+model+'/'+experiment+'/ariane_positions_quantitative.nc'
filepath_initial = rootdir+'experiments/'+model+'/'+experiment+'/ariane_initial.nc'
filepath_time = rootdir+'time/time_orca025_global_5d.mat'
filepath_region = rootdir+'experiments/'+model+'/quant_back_seedNAn1_t3560-sep-4217_sign27.7-28_MLrefz8delsig0.01/region_limits'
# Universal variables
spy = 365*24*60*60
yrst = 1958
yrend = 2016
ventsec = 7
lastinit = 4217

# Ariane input
ds_initial = xr.open_mfdataset(filepath_initial,combine='nested',concat_dim='ntraj')
ds_initial.init_volume.name = 'init_volume'
# Ariane output
ds = xr.open_mfdataset(filepath,combine='nested',concat_dim='ntraj')
ds = xr.merge([ds, ds_initial.init_volume])
ds['final_age'] = ds.final_age.astype('timedelta64[s]').astype('float64')/spy
ds['final_dens'] = calc_sigmantr(ds.final_temp,ds.final_salt)
# Model times
time_vals = np.append(np.array([0]),sio.loadmat(filepath_time)['time'].squeeze())
time = xr.DataArray(time_vals,dims=['nfile'],coords={'nfile':np.arange(time_vals.size)})
# Reagion limits
region_limits = np.loadtxt(filepath_region)

# Bins
years = np.arange(yrst,yrend+1)
ages = np.arange(-3/12,yrend-yrst+9/12)