Exemple #1
0
    def plot(self, var_name, t=None, depth_idx=None, **kwargs):
        """
		Plot 2D data of curvilinear grid. See `plot.plot_data` for details.
		"""
        title = var_name

        # get a valid time object
        t_obj = self.get_time_obj(t)

        # extract data
        data = getattr(self, var_name)

        # handle depth
        depth_key = get_depth_dim(data)
        if depth_key is not None:
            my_depth_idx = 0 if depth_key == 'depth' else -1
            my_depth_idx = my_depth_idx if depth_idx is None else depth_idx
            data = data.isel(**{depth_key: my_depth_idx})
            title += ', depth {:d}'.format(my_depth_idx)

        # handle time
        if 'time' in data.dims:
            data = data.sel(time=t_obj, method='nearest')
            real_time = self.time.sel(time=t_obj,
                                      method='nearest').values.item()
            title += ', time {}'.format(real_time)

        # handle doy
        if 'doy' in data.dims:
            my_doy = 0 if t is None else t
            data = data.isel(doy=my_doy)
            title += ', doy {:d}'.format(my_doy)

        return plot_data(self.grid, data, title=title, **kwargs)
Exemple #2
0
def eke_at_depth(ds,
                 climatology,
                 depth_idx,
                 time_slice=slice(None, None),
                 days_around=30):
    """
	Calculate EKE and associated data at a given depth index. This function is efficient, when the whole domain x time fits into memory.
	"""

    # get times
    times = np.squeeze(ds['time'].values)
    # get depth key
    depth_key = get_depth_dim(ds)

    # load raw data
    raw_u = ds['u'].isel(**{depth_key: depth_idx, 'time': time_slice}).values
    print('Raw u loaded')
    raw_v = ds['v'].isel(**{depth_key: depth_idx, 'time': time_slice}).values
    print('Raw v loaded')
    # load climatological data
    clim_u = climatology['u_b'].isel(**{depth_key: depth_idx}).values
    print('Clim u loaded')
    clim_v = climatology['v_b'].isel(**{depth_key: depth_idx}).values
    print('Clim v loaded')

    # add memory for prime values = value - climatology
    prime_u = raw_u.copy()
    prime_v = raw_v.copy()

    # add memory for results
    eke_b = np.zeros_like(clim_u)
    num_items = np.zeros(clim_u.shape[0])

    # loop times
    for t_idx, t_obj in progressbar.progressbar(enumerate(times)):
        # get doys and weights
        doy = t_obj.dayofyr - 1
        doys = get_doys(t_obj, ds, 30)
        doys_weights = get_triangular_weights(doys)
        # calculate the climatological value
        c_u = np.nansum(clim_u[doys] * doys_weights[:, None, None], axis=0)
        c_v = np.nansum(clim_v[doys] * doys_weights[:, None, None], axis=0)
        # calculate prime values
        prime_u[t_idx] -= c_u
        prime_v[t_idx] -= c_v
        num_items[doy] += 1
        # add eke-climatological
        eke_b[doy] += 0.5 * ((prime_u[t_idx]**2) + (prime_v[t_idx]**2))
    # normalize eke-climatological
    eke_b /= num_items[:, None, None]

    print('Calculated primes')

    # calculate eke
    eke = 0.5 * ((prime_u**2) + (prime_v**2))
    # calculate ke
    ke = 0.5 * (raw_u**2 + raw_v**2)

    return eke, eke_b, prime_u, prime_v, ke
Exemple #3
0
def calculate_climatology_at_depth(ds,
                                   variable,
                                   depth_idx,
                                   days_around=0,
                                   with_std=True):
    """
	Calculate the climatology of a dataset ds for a variable at depth depth_idx. Use days_around for smoothing and write 
	results to ds_out.
	"""

    # load raw data into memory
    depth_key = get_depth_dim(ds)
    assert (depth_key is not None) or depth_idx is None

    data_t = ds[variable].isel(**{
        depth_key: depth_idx
    }).values if depth_key is not None else ds[variable].values
    # get the number of days (number of doys)
    num_days = get_num_days(ds)
    new_shape = tuple([num_days] + list(data_t.shape[1:]))

    # set up data arrays for climatology, its standard deviation and the  number of items
    data_clim = np.empty(new_shape, dtype=float)
    data_clim_std = None if not with_std else np.empty(new_shape, dtype=float)
    data_clim_num = np.empty((num_days, ), dtype=int)

    # loop doys
    for doy in range(num_days):
        # create boolean mask
        mask = get_doy_mask(doy, days_around, ds)
        # get data
        data = data_t[mask]
        assert data.shape[0] > 0, (data.shape, np.count_nonzero(mask))

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",
                                    "Degrees of freedom <= 0 for slice")
            warnings.filterwarnings("ignore", "Mean of empty slice")
            # calculate data
            data_clim[doy] = np.nanmean(data, axis=0)
            if with_std:
                data_clim_std[doy] = np.nanstd(data, axis=0)
            data_clim_num[doy] = data.shape[0]

    return data_clim, data_clim_std, data_clim_num
Exemple #4
0
    def add_data(self,
                 path,
                 variables=[],
                 time_calendar=None,
                 time_raw=None,
                 time_units=None,
                 var_prefix='',
                 time_offset=0,
                 time_from=None):
        """
		Add a netCDF dataset to the object.
		"""

        if path in self._data:
            print('WARNING: this path has been loaded already')
            return

        # open dataset
        data = open_dataset(path,
                            variables,
                            time_calendar=time_calendar,
                            time_raw=time_raw,
                            time_offset=time_offset,
                            time_from=time_from,
                            time_units=time_units,
                            eta_rho_slice=self._eta_slice,
                            xi_rho_slice=self._xi_slice,
                            s_rho_slice=self._s_rho_slice)
        depth_key = get_depth_dim(data)

        # do some consistency tests...

        # ... check depth
        assert self._depth_key is None or depth_key is None or depth_key == self._depth_key
        assert self._depth_num is None or depth_key is None or data.dims[
            depth_key] == self._depth_num

        # ... check if time is the same
        if 'time' in data:
            assert self.time is None or \
                (len(self.time) == len(data['time']) and \
                 abs((self.time[0] - data['time'][0]).values) < np.timedelta64(1, 'h') and \
              abs((self.time[-1] - data['time'][-1]).values) < np.timedelta64(1, 'h')), (self.time[0], data.time[0], self.time[-1], data.time[-1])
            self.time = data.time

        # save depth key (s_rho vs depth)
        if depth_key is not None:
            self._depth_key = depth_key
            self._depth_num = data.dims[depth_key]

        if depth_key == 'depth' and 'depth' in data.coords:
            setattr(self, 'depth', data.depth)

        # save full object for direct access
        self._data[path] = data
        self._prefixes[path] = var_prefix

        # save each single variables
        for var_name in data:
            if getattr(self, var_prefix + var_name, None) is not None:
                print('WARNING:', var_prefix + var_name,
                      'already exist. Consider to change `var_prefix`.')
                continue
            setattr(self, var_prefix + var_name, data[var_name])
            self._reverse_data[var_prefix + var_name] = path
            self._reverse_prefix[var_prefix + var_name] = var_prefix
Exemple #5
0
    def psd(self,
            var_name,
            t,
            depth_idx=None,
            aggregation_mode='mean',
            scaling='density',
            dim='eta',
            min_doy=0,
            max_doy=365):
        """
		Calculate a PSD on the data. When having multiple depth levels, they are averaged. You can combine variables by putting a + in between (this is
		for example useful for horizontal velocities). See `psd.do_multidim_welch` for details.
		"""
        # handle lists of depth idxs:
        # calculate psd for each depth and average y and d_y
        if type(depth_idx) == list or type(depth_idx) == np.ndarray:
            all_y = []
            all_d_y = []
            all_t = None
            all_x = None
            for d_idx in depth_idx:
                all_t, all_x, y, d_y = self.psd(
                    var_name,
                    t,
                    depth_idx=d_idx,
                    aggregation_mode=aggregation_mode,
                    scaling=scaling,
                    dim=dim,
                    min_doy=min_doy,
                    max_doy=max_doy)
                all_y.append(y)
                all_d_y.append(d_y)
            return all_t, all_x, np.nanmean(all_y, axis=0), np.nanmean(all_d_y,
                                                                       axis=0)

        # handle u+v/v+u:
        # calculate psd for u and v seperately and just add them together
        if '+' in var_name:
            dg_time = None
            dg_x = None
            dg_y = None
            dg_dy = None
            vars = var_name.split('+')
            for var in vars:
                d = self.psd(var,
                             t,
                             depth_idx=depth_idx,
                             aggregation_mode=aggregation_mode,
                             scaling=scaling,
                             dim=dim,
                             min_doy=min_doy,
                             max_doy=max_doy)
                # is first var
                if dg_x is None:
                    dg_time = d[0]
                    dg_x = d[1]
                    dg_y = d[2]
                    dg_dy = d[3]**2
                else:
                    assert (d[1] == dg_x).all()
                    dg_y += d[2]
                    dg_dy += d[3]**2
            return dg_time, dg_x, dg_y, np.sqrt(dg_dy)

        # set slices first
        if dim not in self._psd_slices:
            raise RuntimeError(
                'You need to set a subdomain using set_psd_slice for this dim first'
            )

        # get subdomain info
        subdomain_slice = self._psd_slices[dim]['subdomain']
        const_dim_dist = self._psd_slices[dim]['const_dim_dist']
        axis = self._psd_slices[dim]['axis']

        list_real_t = []
        list_y = []
        list_d_y = []
        prev_x = None

        # if t is not a list of t_obj/t_idxs, create a list with length 1
        t_list = t
        if type(t) != list and type(t) != np.ndarray:
            t_list = [t]

        # loop over times and average results
        for t_el in t_list:

            data = getattr(self, var_name)

            # get data by t_obj
            if 'time' in data.dims:
                t_obj = self.get_time_obj(t_el)
                doy = t_obj.dayofyr - 1
                if doy < min_doy or doy >= max_doy:
                    continue
                data = data.sel(time=t_obj, method='nearest')
                real_t = self.time.sel(time=t_obj,
                                       method='nearest').values.item()
            # get data by doy
            if 'doy' in data.dims:
                data = data.isel(doy=t_el)
                real_t = t_el

            # get correct depth
            depth_key = get_depth_dim(data)
            if depth_key is not None:
                my_depth_idx = 0 if depth_key == 'depth' else -1
                my_depth_idx = my_depth_idx if depth_idx is None else depth_idx
                data = data.isel(**{depth_key: my_depth_idx})

            # extract the variable values and slice to subdomain
            var = np.squeeze(data.values.copy())
            var = var[subdomain_slice]

            # do PSD
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore",
                                        message="nperseg = 256 is greater")
                warnings.filterwarnings("ignore", message="divide by zero")
                x, y, d_y = do_multidim_welch(var, const_dim_dist, axis,
                                              aggregation_mode, scaling)

            # check that the x values are actually the same
            if prev_x is not None:
                assert (prev_x == x).all()
            else:
                prev_x = x

            # save data
            list_real_t.append(real_t)
            list_y.append(y)
            list_d_y.append(d_y)

        return list_real_t, prev_x, np.nanmean(list_y,
                                               axis=0), np.nanmean(list_d_y,
                                                                   axis=0)
Exemple #6
0
def create_climatology_output(output_path,
                              dataset,
                              variables,
                              days_around,
                              with_std=False):
    """
	Create a writeable netCDF4 object with the climatology structure
	"""

    # check if output exists
    res = check_output_path(output_path)
    if res is not None:
        # check if days_around matches
        ds_days_around = res.getncattr('days_around')
        if ds_days_around != days_around:
            raise RuntimeError(
                'Found different days_around attribute in file:', days_around)
        return res

    # get number of days per year
    max_days = get_num_days(dataset)
    # get all dimensions
    dims = {}
    for variable in variables:
        for i, dim in enumerate(dataset[variable].dims):
            if dim != "time" and dim != "doy":
                dims[dim] = dataset[variable].shape[i]

    # get depth key
    depth_key = get_depth_dim(dataset)

    # set up output dataset
    ds_out = nc.Dataset(output_path, 'w')  # pylint: disable=no-member
    ds_out.setncatts(dataset.attrs)

    # save the number of days_around used for this
    ds_out.setncattr('days_around', days_around)

    chunksizes = {'doy': 1}

    # create dimensions
    ds_out.createDimension("doy", max_days)
    for dim in dims:
        ds_out.createDimension(dim, dims[dim])
        chunksizes[dim] = dims[dim] if dim != depth_key else min(
            10, dims[depth_key])

    print('Chunking:', chunksizes)

    for variable in variables:
        dimensions = ['doy'] + list([
            dim
            for dim in dataset[variable].dims if dim != "time" and dim != "doy"
        ])
        variable_chunking = tuple([chunksizes[dim] for dim in dimensions])
        # only variable or also with standard deviation
        variable_names = [variable + '_b'] if not with_std else [
            variable + '_b', variable + '_std'
        ]
        # create all the variables
        for variable_name in variable_names:
            ds_out.createVariable(variable_name,
                                  float,
                                  zlib=True,
                                  dimensions=tuple(dimensions),
                                  fill_value=np.nan,
                                  chunksizes=variable_chunking)
            for key in dataset[variable].attrs:
                setattr(ds_out.variables[variable_name], key,
                        dataset[variable].attrs[key])
        ds_out.createVariable(variable + '_num',
                              int,
                              zlib=True,
                              dimensions=("doy", ),
                              fill_value=0)

    return ds_out
Exemple #7
0
                        "--output",
                        type=str,
                        help="Output path for smoothed climatology",
                        required=True)
    parser.add_argument(
        "--days-around",
        type=int,
        help="Number of days around doy for smoothed climatology",
        default=0)

    args = parser.parse_args()

    # get initial dataset
    dsf = get_dataset(args)

    depth_key = get_depth_dim(dsf)
    depth_idxs = [None] if depth_key is None else list(
        range(dsf.dims[depth_key]))

    print(depth_idxs)

    # get output netCDF file
    out = create_climatology_output(args.output,
                                    dsf,
                                    args.variables,
                                    args.days_around,
                                    with_std=False)

    # loop vars
    for var in args.variables:
        print('Start calculation of', var)
Exemple #8
0
def create_eke_output(output_path, dataset):
    """
	Create a writeable netCDF4 object with the eke structure
	"""

    # check if output exists
    res = check_output_path(output_path)
    if res is not None:
        return res

    # get depth key
    depth_key = get_depth_dim(dataset)

    # get all dimensions
    dims = {
        'eta_rho': dataset['u'].shape[2],
        'xi_rho': dataset['u'].shape[3],
        'time': dataset['u'].shape[0],
        'doy': 365
    }

    # set up output dataset
    ds_out = nc.Dataset(output_path, 'w')  # pylint: disable=no-member
    ds_out.setncatts(dataset.attrs)

    # create dimensions
    chunksizes = {}
    for dim in dims:
        ds_out.createDimension(dim, dims[dim])
        chunksizes[dim] = dims[dim]
    chunksizes['time'] = 30
    chunksizes['doy'] = 30

    print('Chunking:', chunksizes)

    # create time variable
    ds_out.createVariable('time',
                          float,
                          zlib=True,
                          dimensions=('time', ),
                          fill_value=np.nan)
    ds_out.variables['time'].units = dataset.time.attrs['units']
    ds_out.variables['time'].calendar = dataset.time.attrs['calendar']
    # save time values
    conv_fn = partial(date2num,
                      units=dataset.time.attrs['units'],
                      calendar=dataset.time.attrs['calendar'])
    ds_out['time'][:] = np.array(
        [conv_fn(t_obj) for t_obj in dataset.time.values])

    # create variables with correct chunking
    for variable in ['eke', 'eke_b', 'u_p', 'v_p', 'ke']:
        dimensions = ['time', 'eta_rho', 'xi_rho']
        if variable == 'eke_b':
            dimensions[0] = 'doy'
        variable_chunking = tuple([chunksizes[dim] for dim in dimensions])
        ds_out.createVariable(variable,
                              float,
                              zlib=True,
                              dimensions=tuple(dimensions),
                              fill_value=np.nan,
                              chunksizes=variable_chunking)

    return ds_out