def __init__(self, time=None, location=None, frequency=None,
                 direction=None, energy=None, spreading=None,
                 time_units='s', location_units='m',
                 frequency_units='Hz', direction_units='deg',
                 energy_units='m^2/Hz', spreading_units='deg',
                 time_var='time', location_var='location',
                 frequency_var='frequency', direction_var='direction',
                 energy_var='energy', spreading_var='spreading',
                 frequency_convention='absolute',
                 direction_convention='nautical',
                 spreading_convention='cosine', spectral=True,
                 directional=True, attrs={}, crs=None, **kwargs):
        '''Initialize class

        Sets dimensions, converts coordinates and fills the dataset,
        if data is provided.

        Parameters
        ----------
        time : iterable, optional
            Time coordinates, each item can be a datetime object or
            float
        location : iterable of 2-tuples, optional
            Location coordinates, each item is a 2-tuple with x- and
            y-coordinates
        frequency : iterable, optional
            Frequency cooridinates
        direction : iterable, optional
            Direction coordinates
        energy : matrix, optional
            Wave energy
        time_units : str, optional
            Units of time coordinates (default: s)
        location_units : str, optional
            Units of location coordinates (default: m)
        frequency_units : str, optional
            Units of frequency coordinates (default: Hz)
        direction_units : str, optional
            Units of direction coordinates (default: deg)
        energy_units : str, optional
            Units of wave energy (default: m^2/Hz)
        time_var : str, optional
            Name of time variable (default: time)
        location_var : str, optional
            Name of location variable (default: location)
        frequency_var : str, optional
            Name of frequency variable (default: frequency)
        direction_var : str, optional
            Name of direction variable (default: direction)
        energy_var : str, optional
            Name of wave energy variable (default: energy)
        frequency_convention : str, optional
            Convention of frequency definition (default: absolute)
        direction_convention : str, optional
            Convention of direction definition (default: nautical)
        attrs : dict-like, optional
            Global attributes
        crs : str, optional
            Proj4 specification of local coordinate reference system
        kwargs : dict, optional
            Additional options passed to the xarray.Dataset
            initialization method

        See Also
        --------
        oceanwaves.OceanWaves.reinitialize

        '''

        dims = []
        coords = OrderedDict()
        data_vars = OrderedDict()

        # simplify dimensions
        time = np.asarray(time)
        location = np.asarray(location)
        frequency = np.asarray(frequency, dtype=np.float)
        direction = np.asarray(direction, dtype=np.float)
        spreading = np.asarray(spreading, dtype=np.float)
        energy = np.asarray(energy, dtype=np.float)
        
        # simplify units
        time_units = simplify(time_units)
        location_units = simplify(location_units)
        frequency_units = simplify(frequency_units)
        direction_units = simplify(direction_units)
        energy_units = simplify(energy_units)
        
        # determine object dimensions
        if self._isvalid(time):
            dims.append(time_var)
            coords[time_var] = xr.Variable(
                time_var,
                time
            )

            # only set time units if given. otherwise a datetime
            # object is assumed that is encoded by xarray. setting
            # units manually in that case would raise an exception if
            # the dataset is written to CF-compatible netCDF.
            if time_units is None or time_units != '':
                coords[time_var].attrs.update(dict(units=time_units))


        if self._isvalid(location):
            dims.append(location_var)
            coords[location_var] = xr.Variable(
                location_var,
                np.arange(len(location))
            )
            
            x, y = list(zip(*location))
            coords['%s_x' % location_var] = xr.Variable(
                location_var,
                np.asarray(x),
                attrs=dict(units=location_units)
            )
            coords['%s_y' % location_var] = xr.Variable(
                location_var,
                np.asarray(y),
                attrs=dict(units=location_units)
            )
                
            coords['%s_lat' % location_var] = xr.Variable(
                location_var,
                np.asarray(x) + np.nan,
                attrs=dict(units='degN')
            )
            coords['%s_lon' % location_var] = xr.Variable(
                location_var,
                np.asarray(y) + np.nan,
                attrs=dict(units='degE')
            )

        if self._isvalid(frequency, mask=frequency>0) and spectral:
            dims.append(frequency_var)
            coords[frequency_var] = xr.Variable(
                frequency_var,
                frequency[frequency>0],
                attrs=dict(units=frequency_units)
            )
            
        if self._isvalid(direction) and directional:
            dims.append(direction_var)
            coords[direction_var] = xr.Variable(
                direction_var,
                direction,
                attrs=dict(units=direction_units)
            )

        # determine object shape
        shp = tuple([len(c) for k, c in coords.items() if k in dims])

        # initialize energy variable
        data_vars[energy_var] = xr.DataArray(
            np.nan + np.zeros(shp),
            dims=dims,
            coords=coords,
            attrs=dict(units=energy_units)
        )

        # store parameterized frequencies
        if not spectral:
            if self._isvalid(frequency):
                data_vars[frequency_var] = xr.DataArray(
                    frequency,
                    dims=dims,
                    coords=coords,
                    attrs=dict(units=direction_units)
                )
        
        # store parameterized directions
        if not directional:
            if self._isvalid(direction):
                data_vars[direction_var] = xr.DataArray(
                    direction,
                    dims=dims,
                    coords=coords,
                    attrs=dict(units=direction_units)
                )
            if self._isvalid(spreading):
                data_vars[spreading_var] = xr.DataArray(
                    spreading,
                    dims=dims,
                    coords=coords,
                    attrs=dict(units=spreading_units)
                )
        
        # collect global attributes
        attrs.update(dict(
            _init=kwargs.copy(),
            _crs=crs,
            _names=dict(
                time = time_var,
                location = location_var,
                frequency = frequency_var,
                direction = direction_var,
                spreading = spreading_var,
                energy = energy_var
            ),
            _units=dict(
                time = time_units,
                location = location_units,
                frequency = frequency_units,
                direction = direction_units,
                energy = energy_units
            ),
            _conventions=dict(
                frequency = frequency_convention,
                direction = direction_convention,
                spreading = spreading_convention
            )
        ))
        
        # initialize empty object
        super(OceanWaves, self).__init__(
            data_vars=data_vars,
            coords=coords,
            attrs=attrs,
            **kwargs
        )

        # set wave energy
        if self._isvalid(energy):
            self['_energy'] = dims, energy.reshape(shp)

        # convert coordinates
        self.convert_coordinates(crs)
Example #2
0
def create_data_array_from_record(
    record: GradsRecordHandler,
    parameter,
    level,
    level_dim_name=None,
    latitude_direction="degree_north",
) -> Optional[xr.DataArray]:
    grads_ctl = record.grads_ctl

    # values
    file_path = grads_ctl.get_data_file_path(record.record_info)
    with open(file_path, "rb") as f:
        values = record.load_data(f)

    # coords
    lons = grads_ctl.xdef["values"]
    lats = grads_ctl.ydef["values"]

    if latitude_direction == "degree_north":
        values = np.flip(values, 0)
        lats = lats[::-1]

    coords = {}
    coords["latitude"] = xr.Variable(
        "latitude",
        lats,
        attrs={
            "units": latitude_direction,
            "standard_name": "latitude",
            "long_name": "latitude"
        },
    )
    coords["longitude"] = xr.Variable("longitude",
                                      lons,
                                      attrs={
                                          "units": "degrees_east",
                                          "standard_name": "longitude",
                                          "long_name": "longitude"
                                      })

    coords[level_dim_name] = level
    coords["valid_time"] = record.record_info["valid_time"]

    if grads_ctl.start_time is not None and grads_ctl.forecast_time is not None:
        coords["start_time"] = grads_ctl.start_time
        coords["forecast_time"] = grads_ctl.forecast_time

    # dims
    dims = ("latitude", "longitude")

    # attrs
    data_attrs = {"description": record.record_info["description"]}

    data = xr.DataArray(
        values,
        dims=dims,
        coords=coords,
        attrs=data_attrs,
        name=parameter,
    )

    return data
Example #3
0
 def setUp(self):
     self.data = sparse.random((4, 6), random_state=0, density=0.5)
     self.var = xr.Variable(("x", "y"), self.data)
Example #4
0
    def compute(self, data, selected_indexes):
        observed = data.time.values[selected_indexes] - np.datetime64(
            self._since)
        days_since = observed.astype('timedelta64[D]').astype('int16')

        return self._var_name, xarray.Variable(('y', 'x'), days_since)
Example #5
0
 def compute(self, data, selected_indexes):
     return self._var_name, xarray.Variable(
         ('y', 'x'), data.source.values[selected_indexes])
Example #6
0
    def get_dataset(self,
                    varnames=None,
                    iter_start=None,
                    iter_stop=None,
                    iter_step=None,
                    k_levels=None,
                    k_chunksize=1,
                    type='faces'):
        """
        Create an xarray Dataset object for this model.

        Parameters
        ----------
        *varnames : list of strings, optional
            The variables to include, e.g. ``['Salt', 'Theta']``. Otherwise
            include all known variables.
        iter_start : int, optional
            Starting iteration number. Otherwise use model default.
            Follows standard `range` conventions. (inclusive)
        iter_start : int, optional
            Stopping iteration number. Otherwise use model default.
            Follows standard `range` conventions. (exclusive)
        iter_step : int, optional
            Iteration number stepsize. Otherwise use model default.
        k_levels : list of ints, optional
            Vertical levels to extract. Default is to get them all
        k_chunksize : int, optional
            How many vertical levels per Dask chunk.
        type : {'faces', 'latlon'}, optional
            What type of dataset to create

        Returns
        -------
        ds : xarray.Dataset
        """
        def _if_not_none(a, b):
            if a is None:
                return b
            else:
                return a

        iter_start = _if_not_none(iter_start, self.iter_start)
        iter_stop = _if_not_none(iter_stop, self.iter_stop)
        iter_step = _if_not_none(iter_step, self.iter_step)
        iter_params = [iter_start, iter_stop, iter_step]
        if any([a is None for a in iter_params]):
            raise ValueError("The parameters `iter_start`, `iter_stop` "
                             "and `iter_step` must be defined either by the "
                             "model class or as argument. Instead got %r " %
                             iter_params)
        iters = np.arange(*iter_params)

        varnames = varnames or self.varnames

        ds = self._make_coords_faces(iters)
        if type == 'latlon':
            ds = _faces_coords_to_latlon(ds)

        k_levels = k_levels or np.arange(self.nz)
        ds = ds.sel(k=k_levels, k_l=k_levels, k_u=k_levels, k_p1=k_levels)

        # get the data in facet form
        data_facets = {
            vname: self._get_facet_data(vname, iters, k_levels, k_chunksize)
            for vname in varnames
        }

        # transform it into faces or latlon
        data_transformers = {
            'faces': _all_facets_to_faces,
            'latlon': _all_facets_to_latlon
        }

        transformer = data_transformers[type]
        data = transformer(data_facets, _VAR_METADATA)

        variables = {}
        for vname in varnames:
            meta = _VAR_METADATA[vname]
            dims = meta['dims']
            if type == 'faces':
                dims = _add_face_to_dims(dims)
            dims = [
                'time',
            ] + dims
            attrs = meta['attrs']
            variables[vname] = xr.Variable(dims, data[vname], attrs)

        ds = ds.update(variables)
        return ds
Example #7
0
 def _extend_1d_vertical_to_2d(cls, vertical_variable, reference_var):
     shape = reference_var.shape[-2:]
     var_reshaped = np.resize(vertical_variable, shape[::-1])
     var_reshaped = np.moveaxis(var_reshaped, 0, 1)
     return xr.Variable(reference_var.dims[-2:], var_reshaped)
Example #8
0
        axes = (0, 3)
        dims = ('f', 'plev', 'lat', 'k')
    ff, kk, spectrum, *powers = climpy.power2d(
        *params[flux],
        dx=dt,
        dy=dlon,
        axes=axes,
        wintype=wintype,
        nperseg=days.size,
        coherence=False,
    )

    # Coordinates
    if out is None:
        f = xr.Variable(
            ('f',), ff, {'long_name': 'frequency', 'units': 'cycles/day'}
        )
        k = xr.Variable(
            ('k',), kk, {'long_name': 'zonal wavenumber', 'units': 'none'}
        )
        out = xr.Dataset(
            {},
            coords={'f': f, 'plev': plev, 'lat': lat, 'k': k},
        )

    # Save to file
    if powers:  # non-empty, i.e. we got a *co*-spectrum
        # Save power spectra, but make sure not to do so twice
        _, P1, P2 = powers
        for i, P in enumerate((P1, P2)):
            if shorts_i[i] not in out:
Example #9
0
def test_apply_output_core_dimension():

    def stack_negative(obj):
        def func(x):
            return xr.core.npcompat.stack([x, -x], axis=-1)
        result = apply_ufunc(func, obj, output_core_dims=[['sign']])
        if isinstance(result, (xr.Dataset, xr.DataArray)):
            result.coords['sign'] = [1, -1]
        return result

    array = np.array([[1, 2], [3, 4]])
    variable = xr.Variable(['x', 'y'], array)
    data_array = xr.DataArray(variable, {'x': ['a', 'b'], 'y': [-1, -2]})
    dataset = xr.Dataset({'data': data_array})

    stacked_array = np.array([[[1, -1], [2, -2]], [[3, -3], [4, -4]]])
    stacked_variable = xr.Variable(['x', 'y', 'sign'], stacked_array)
    stacked_coords = {'x': ['a', 'b'], 'y': [-1, -2], 'sign': [1, -1]}
    stacked_data_array = xr.DataArray(stacked_variable, stacked_coords)
    stacked_dataset = xr.Dataset({'data': stacked_data_array})

    assert_identical(stacked_array, stack_negative(array))
    assert_identical(stacked_variable, stack_negative(variable))
    assert_identical(stacked_data_array, stack_negative(data_array))
    assert_identical(stacked_dataset, stack_negative(dataset))
    assert_identical(stacked_data_array,
                     stack_negative(data_array.groupby('x')))
    assert_identical(stacked_dataset,
                     stack_negative(dataset.groupby('x')))

    def original_and_stack_negative(obj):
        def func(x):
            return (x, xr.core.npcompat.stack([x, -x], axis=-1))
        result = apply_ufunc(func, obj, output_core_dims=[[], ['sign']])
        if isinstance(result[1], (xr.Dataset, xr.DataArray)):
            result[1].coords['sign'] = [1, -1]
        return result

    out0, out1 = original_and_stack_negative(array)
    assert_identical(array, out0)
    assert_identical(stacked_array, out1)

    out0, out1 = original_and_stack_negative(variable)
    assert_identical(variable, out0)
    assert_identical(stacked_variable, out1)

    out0, out1 = original_and_stack_negative(data_array)
    assert_identical(data_array, out0)
    assert_identical(stacked_data_array, out1)

    out0, out1 = original_and_stack_negative(dataset)
    assert_identical(dataset, out0)
    assert_identical(stacked_dataset, out1)

    out0, out1 = original_and_stack_negative(data_array.groupby('x'))
    assert_identical(data_array, out0)
    assert_identical(stacked_data_array, out1)

    out0, out1 = original_and_stack_negative(dataset.groupby('x'))
    assert_identical(dataset, out0)
    assert_identical(stacked_dataset, out1)
Example #10
0
    def calc_anom(self,
                  variable,
                  window=1,
                  smooth=1,
                  groupby='dayofyear',
                  clim=None):
        """
        Creates a new variable with name "anom" from variable.
        Anomalies are computed for each grid point and time step as the departure from a climatology.

        Parameters
        ----------
            variable : string
                Input variable.
            window : int, optional
                number of timesteps for running mean. The default is 1.
            smooth : int, optional
                number of timesteps for smoothing anomaly field. The default is 1.
            clim : string, optional
                If None: Calculate (long-term) climatological mean from input variable with groupby operation and running window.
                If string: path + dataname. Will be opened with xr.open_dataarray() 
                If xarray.DataArray: containing the climatology. 
                Will be regridded to resolution of input variable.
            groupby : string
                xarray “group by” operations. The default is dayofyear.
                

        Returns
        -------
            xarray.Dataset: float
                An xarray Dataset object containing the anomalie field.

        """

        # Set up dimensions
        logger.info("Set up dimensions...")
        if hasattr(self, '_time_name'):
            # print names
            logger.info("\n time: '{}'\n"
                        " longitude: '{}'\n"
                        " latitude: '{}'\n".format(self._time_name,
                                                   self._longitude_name,
                                                   self._latitude_name))
            pass
        else:
            self.set_up()

        # step 1: calculate clim
        if clim is None:
            logger.info(
                'Calculating climatological mean from {}...'.format(variable))
            clim_mean = self.calc_clim(variable=variable,
                                       window=window,
                                       groupby=groupby)
            clim = 'from {} with running window time steps {}'.format(
                variable, window)
        else:
            logger.info('Reading climatological mean from {}...'.format(clim))
            # if string, load data
            if isinstance(clim, str):
                clim_mean = xr.open_dataarray(clim)
            else:  # clim is xarray.DataArray
                clim_mean = clim

            # check time dimension
            if groupby not in clim_mean.dims:
                clim_mean = clim_mean.groupby(self._time_name + '.' + groupby)

            # regrid        - grid dimensions in clim must have same name as in input variable
            clim_mean = clim_mean.reindex(**{
                self._latitude_name:
                self.ds[self._latitude_name],
                self._longitude_name:
                self.ds[self._longitude_name]
            },
                                          method='nearest')

        # step 2: calculate and create new variable anomaly
        self.ds['anom'] = xr.Variable(
            self.ds[variable].dims,
            (self.ds[variable].groupby(self._time_name + '.' + groupby) -
             clim_mean).rolling(time=smooth, center=True).mean(
             ),  # [variable] at end if error because of frozen dimensions
            attrs={
                'units':
                self.ds[variable].attrs['units'],
                'long_name':
                self.ds[variable].attrs['long_name'] + ' Anomaly',
                'standard_name':
                self.ds[variable].attrs['long_name'] + ' anomaly',
                'history':
                ' '.join([
                    'Calculated from {} with input attributes:',
                    'smoothing time steps = {},', 'climatology = {}.'
                ]).format(variable, smooth, clim)
            })
        logger.info('Calculating Anomaly... DONE')
Example #11
0
    def run_contrack(self,
                     variable,
                     threshold,
                     gorl,
                     overlap,
                     persistence,
                     twosided=True):
        """
        Spatial and temporal tracking of closed contours.
        
        Parameters
        ----------
            variable : string
                input variable.
            threshold : int
                threshold value to detect contours.
            gorl : string
                find contours that are greater or lower than threshold value [>, >=, <, >=, ge,le,gt,lt].
            overlap : int
                overlapping fraction of two contours between two time steps [0-1].
            persistence : int
                temporal persistence (in time steps) of the contour life time
            twosided = True : bool, optional
                if true twosided (forward and backward) overlap test, otherwise just forward (more transient contours)
                

        Returns
        -------
            xarray.Dataset: float
                An xarray Dataset object containing the flag field.
                Each unique feature has a unique label/flag.
        
        """

        logger.info("\nRun ConTrack \n"
                    "########### \n"
                    "    threshold:    {} {} \n"
                    "    overlap:      {} \n"
                    "    persistence:  {} time steps".format(
                        gorl, threshold, overlap, persistence))

        # Set up dimensions
        logger.info("Set up dimensions...")
        if hasattr(self, '_time_name'):
            # print names
            logger.info("\n time: '{}'\n"
                        " longitude: '{}'\n"
                        " latitude: '{}'\n".format(self._time_name,
                                                   self._longitude_name,
                                                   self._latitude_name))
            pass
        else:
            self.set_up()

        # step 1: define closed contours (greater or less than threshold)
        logger.info("Find individual contours...")
        if gorl == '>=' or gorl == 'ge':
            flag = xr.where(self.ds[variable] >= threshold, 1, 0)
        elif gorl == '<=' or gorl == 'le':
            flag = xr.where(self.ds[variable] <= threshold, 1, 0)
        elif gorl == '>' or gorl == 'gt':
            flag = xr.where(self.ds[variable] > threshold, 1, 0)
        elif gorl == '<' or gorl == 'lt':
            flag = xr.where(self.ds[variable] < threshold, 1, 0)
        else:
            errmsg = ' Please select from [>, >=, <, >=] for gorl'
            raise ValueError(errmsg)

        # set order of dimension to (time,lat,lon)
        dims = self.ds[variable].dims
        sort = [
            dims.index(dim) for dim in
            [self._time_name, self._latitude_name, self._longitude_name]
        ]
        flag = flag.transpose(dims[sort[0]], dims[sort[1]], dims[sort[2]])

        # step 2: identify individual contours (only along x and y)
        flag, num_features = ndimage.label(
            flag.data,
            structure=np.array([[[0, 0, 0], [0, 0, 0], [0, 0, 0]],
                                [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
                                [[0, 0, 0], [0, 0, 0], [0, 0, 0]]])
        )  # comment: can lead to memory error... better to loop over each time step?

        # periodic boundry: allow contours to cross date border
        # comment: what if dimension index not in order (time,lat,lon)? --> self.ds[variable].dims.index(self._latitude_name)
        for tt in range(len(self.ds[self._time_name])):
            for yy in range(len(self.ds[self._latitude_name])):
                if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and (
                        flag[tt, yy, 0] > flag[tt, yy, -1]):
                    # downstream
                    flag[tt][flag[tt] == flag[tt, yy, 0]] = flag[tt, yy, -1]
                if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and (
                        flag[tt, yy, 0] < flag[tt, yy, -1]):
                    # upstream
                    flag[tt][flag[tt] == flag[tt, yy, -1]] = flag[tt, yy, 0]

        #step 3: overlapping
        logger.info("Apply overlap...")

        weight_lat = np.cos(self.ds[self._latitude_name].data * np.pi / 180)
        weight_grid = np.ones(
            (self.ds.dims[self._latitude_name],
             self.ds.dims[self._longitude_name])) * np.array(
                 (111 * self._dlat * 111 * self._dlon * weight_lat)).astype(
                     np.float32)[:, None]

        for tt in range(1, len(self.ds[self._time_name]) - 1):
            # loop over individual contours
            slices = ndimage.find_objects(flag[tt])
            label = 0
            for slice_ in slices:
                label = label + 1
                if slice_ is None:
                    #no feature with this flag/label
                    continue

                # calculate values
                areacon = np.sum(
                    weight_grid[slice_][flag[tt][slice_] == label])
                areaover_forward = np.sum(
                    weight_grid[slice_][(flag[tt][slice_] == label)
                                        & (flag[tt + 1][slice_] >= 1)])
                areaover_backward = np.sum(
                    weight_grid[slice_][(flag[tt][slice_] == label)
                                        & (flag[tt - 1][slice_] >= 1)])

                fraction_backward = (1 / areacon) * areaover_backward
                fraction_forward = (1 / areacon) * areaover_forward

                # apply overlap criterion forward and backward
                if twosided:
                    # middle
                    if fraction_backward != 0 and fraction_forward != 0:
                        if (fraction_backward < overlap) or (fraction_forward <
                                                             overlap):
                            flag[tt][slice_][(flag[tt][slice_] == label)] = 0.
                    # decay
                    if fraction_backward != 0 and fraction_forward == 0:
                        if (fraction_backward < overlap):
                            flag[tt][slice_][(flag[tt][slice_] == label)] = 0.
                    # onset
                    if fraction_backward == 0 and fraction_forward != 0:
                        if (fraction_forward < overlap):
                            flag[tt][slice_][(flag[tt][slice_] == label)] = 0.

                # apply overlap criterion only forward (capture also more transient features)
                else:
                    if (fraction_forward < overlap):
                        flag[tt][slice_][(flag[tt][slice_] == label)] = 0.

        # step 4: persistency
        # find features along time axis
        logger.info("Apply persistence...")
        flag = xr.where(flag >= 1, 1, 0)
        flag, num_features = ndimage.label(
            flag,
            structure=np.array([[[0, 0, 0], [0, 1, 0], [0, 0, 0]],
                                [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
                                [[0, 0, 0], [0, 1, 0], [0, 0, 0]]
                                ]))  # comment: can lead to memory error...
        # periodic boundry: allow features to cross date border
        slices = ndimage.find_objects(flag)
        for tt in range(len(self.ds[self._time_name])):
            for yy in range(len(self.ds[self._latitude_name])):
                if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and (
                        flag[tt, yy, 0] > flag[tt, yy, -1]):
                    # downstream
                    slice_ = slices[flag[tt, yy, 0] - 1]
                    flag[slice_][(flag[slice_] == flag[tt, yy,
                                                       0])] = flag[tt, yy, -1]
                if flag[tt, yy, 0] > 0 and flag[tt, yy, -1] > 0 and (
                        flag[tt, yy, 0] < flag[tt, yy, -1]):
                    # upstream
                    slice_ = slices[flag[tt, yy, 0] - 1]
                    flag[slice_][(flag[slice_] == flag[tt, yy,
                                                       -1])] = flag[tt, yy, 0]
        # check for persistance, remove features with lifetime < persistance
        label = 0
        for slice_ in ndimage.find_objects(flag):
            label = label + 1
            if slice_ is None:
                #no feature with this flag
                continue
            if (slice_[0].stop - slice_[0].start) < persistence:
                flag[slice_][(flag[slice_] == label)] = 0.

        # step 5: create new variable flag
        logger.info("Create new variable 'flag'...")
        self.ds['flag'] = xr.Variable(
            self.ds[variable].dims,
            flag.transpose(sort),
            attrs={
                'units':
                'flag',
                'long_name':
                'contrack flag',
                'standard_name':
                'contrack flag',
                'history':
                ' '.join([
                    'Calculated from {} with input attributes:',
                    'threshold = {} {},', 'overlap fraction = {},',
                    'persistence time steps = {}.', 'twosided = {}'
                ]).format(variable, gorl, threshold, overlap, persistence,
                          twosided),
                'reference':
                'https://github.com/steidani/ConTrack'
            })

        num_features = len(np.unique(flag)) - 1  # don't count 0
        logger.info("Running contrack... DONE\n"
                    "{} contours tracked".format(num_features))
def construct_ds_from_dict(info, expt_labels, vars = ["rld", "rlu"]):
    ''' Contruct an xarray Dataset with global-mean up- and down fluxes for each realization and each experiement.
    Mean fluxes are supplemented with net fluxes, absorption, and forcing

    The location of the data is constructed from the keys in argument info, depending on whether the data is remote
    (assumed to be the Earth System) or local

    Parameters:
        info (dict): a dictionary with keys name, location, institution, physics, forcing, and realization
        expt_labels (array of strings): Names of the experiments, length needs to match size of "experiment" dimension in files
        vars = ["rld", "rlu"]: variables to be read
    '''
    #
    # info is a list of dictionaries
    #
    out = xr.concat([open_one_file_set(i, vars) for i in info],
                    dim=pd.Index([i["realization"] for i in info], name="realization"))

    out["forcing_index"] = xr.Variable(dims="realization", data=[i["forcing"] for i in info])
    out["physics_index"] = xr.Variable(dims="realization", data=[i["physics"] for i in info])
    out = out.assign_coords(expt=expt_labels)
    #
    # Weighted mean across profiles - profiles_weights should be the same across all realizations
    #
    x = (out * out.profile_weight/out.profile_weight.sum(dim='site')).sum(dim='site')
    # Profile weight depend on site but we've averaged over all those
    x = x.drop("profile_weight")
    # Variable attributes get lost in that reduction
    for v in x.variables: x[v].attrs = out[v].attrs
    out = x

    toa = out.isel(expt=0).plev.argmin().values
    sfc = out.isel(expt=0).plev.argmax().values

    if "rld" in out:
        band = "l"
        #
        # Net flux; atmospheric absorption
        #
        net = out["r" + band + "d"] - out["r" + band + "u"]
        net.attrs = {"standard_name":"net_downward_longwave_flux_in_air",
                     "variable_name":"rln",
                     "units":out["r" + band + "u"].attrs["units"],
                     "cell_methods":out["r" + band + "u"].attrs["cell_methods"]}
        out["r" + band + "n"] = net
        out["r" + band + "a"] = net.sel(level=toa) - net.sel(level=sfc)
        out["r" + band + "a"].attrs = \
                     {"standard_name":"atmosphere_net_rate_of_absorption_of_longwave_energy",
                     "variable_name":"rla",
                     "units":out["r" + band + "u"].attrs["units"],
                     "cell_methods":out["r" + band + "u"].attrs["cell_methods"]}
        out = compute_forcing(out, band)
    if "rsd" in out:
        band = "s"
        #
        # Net flux; atmospheric absorption
        #
        net = out["r" + band + "d"] - out["r" + band + "u"]
        net.attrs = {"standard_name":"net_downward_shortwave_flux_in_air",
                     "variable_name":"rln",
                     "units":out["r" + band + "u"].attrs["units"],
                     "cell_methods":out["r" + band + "u"].attrs["cell_methods"]}
        out["r" + band + "n"] = net
        out["r" + band + "a"] = net.sel(level=toa) - net.sel(level=sfc)
        out["r" + band + "a"].attrs = \
                     {"standard_name":"atmosphere_net_rate_of_absorption_of_shortwave_energy",
                     "variable_name":"rla",
                     "units":out["r" + band + "u"].attrs["units"],
                     "cell_methods":out["r" + band + "u"].attrs["cell_methods"]}
        out = compute_forcing(out, band)
    return(out)
Example #13
0
    def quantity(x):
        return x

    has_pint = False


def test_allclose_regression():
    x = xr.DataArray(1.01)
    y = xr.DataArray(1.02)
    xr.testing.assert_allclose(x, y, atol=0.01)


@pytest.mark.parametrize(
    "obj1,obj2",
    (
        pytest.param(xr.Variable("x", [1e-17, 2]),
                     xr.Variable("x", [0, 3]),
                     id="Variable"),
        pytest.param(
            xr.DataArray([1e-17, 2], dims="x"),
            xr.DataArray([0, 3], dims="x"),
            id="DataArray",
        ),
        pytest.param(
            xr.Dataset({
                "a": ("x", [1e-17, 2]),
                "b": ("y", [-2e-18, 2])
            }),
            xr.Dataset({
                "a": ("x", [0, 2]),
                "b": ("y", [0, 1])
Example #14
0
import pytest

import xarray as xr


def test_allclose_regression():
    x = xr.DataArray(1.01)
    y = xr.DataArray(1.02)
    xr.testing.assert_allclose(x, y, atol=0.01)


@pytest.mark.parametrize(
    "obj1,obj2",
    (
        pytest.param(
            xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable",
        ),
        pytest.param(
            xr.DataArray([1e-17, 2], dims="x"),
            xr.DataArray([0, 3], dims="x"),
            id="DataArray",
        ),
        pytest.param(
            xr.Dataset({"a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2])}),
            xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}),
            id="Dataset",
        ),
    ),
)
def test_assert_allclose(obj1, obj2):
    with pytest.raises(AssertionError):
Example #15
0
    def __init__(self,
                 filename,
                 fields=[],
                 categories=[],
                 fix_cf=False,
                 mode='r',
                 endian='>',
                 diaginfo_file='',
                 tracerinfo_file='',
                 use_mmap=False,
                 dask_delayed=False):

        # Track the metadata accompanying this dataset.
        dir_path = os.path.abspath(os.path.dirname(filename))
        if not dir_path:
            dir_path = os.getcwd()
        if not tracerinfo_file:
            tracerinfo_file = os.path.join(dir_path, 'tracerinfo.dat')
            if not os.path.exists(tracerinfo_file):
                tracerinfo_file = ''
        self.tracerinfo_file = tracerinfo_file
        if not diaginfo_file:
            diaginfo_file = os.path.join(dir_path, 'diaginfo.dat')
            if not os.path.exists(diaginfo_file):
                diaginfo_file = ''
        self.diaginfo_file = diaginfo_file

        self.filename = filename
        self.fsize = os.path.getsize(self.filename)
        self.mode = mode
        if not mode.startswith('r'):
            raise ValueError(
                "Currently only know how to 'r(b)'ead bpch files.")

        # Check endianness flag
        if endian not in ['>', '<', '=']:
            raise ValueError("Invalid byte order (endian={})".format(endian))
        self.endian = endian

        # Open the raw output file, but don't yet read all the data
        self._mmap = use_mmap
        self._dask = dask_delayed
        self._bpch = BPCHFile(self.filename,
                              self.mode,
                              self.endian,
                              tracerinfo_file=tracerinfo_file,
                              diaginfo_file=diaginfo_file,
                              eager=False,
                              use_mmap=self._mmap,
                              dask_delayed=self._dask)
        self.fields = fields
        self.categories = categories

        # Peek into the raw output file and read the header and metadata
        # so that we can get a head start at building the output grid
        self._bpch._read_metadata()
        self._bpch._read_header()

        # Parse the binary file and prepare to add variables to the DataStore
        self._bpch._read_var_data()

        # Create storage dicts for variables and attributes, to be used later
        # when xarray needs to access the data
        self._variables = OrderedDict()
        self._attributes = OrderedDict()
        self._attributes.update(self._bpch._attributes)
        self._dimensions = [d for d in BASE_DIMENSIONS]

        # Begin constructing the coordinate dimensions shared by the
        # output dataset variables
        dim_coords = {}
        self.ctm_info = CTMGrid.from_model(self._attributes['modelname'],
                                           resolution=self._attributes['res'])

        # Add vertical dimensions
        self._dimensions.append(dict(dims=[
            'lev',
        ], attrs={'axis': 'Z'}))
        self._dimensions.append(dict(dims=[
            'lev_trop',
        ], attrs={'axis': 'Z'}))
        self._dimensions.append(dict(dims=[
            'lev_edge',
        ], attrs={'axis': 'Z'}))
        eta_centers = self.ctm_info.eta_centers
        sigma_centers = self.ctm_info.sigma_centers

        # Add time dimensions
        self._dimensions.append(
            dict(dims=[
                'time',
            ],
                 attrs={
                     'axis': 'T',
                     'long_name': 'time',
                     'standard_name': 'time'
                 }))

        # Add lat/lon dimensions
        self._dimensions.append(
            dict(dims=[
                'lon',
            ],
                 attrs={
                     'axis': 'X',
                     'long_name': 'longitude coordinate',
                     'standard_name': 'longitude'
                 }))
        self._dimensions.append(
            dict(dims=[
                'lat',
            ],
                 attrs={
                     'axis': 'y',
                     'long_name': 'latitude coordinate',
                     'standard_name': 'latitude'
                 }))

        if eta_centers is not None:
            lev_vals = eta_centers
            lev_attrs = {
                'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate',
                'axis': 'Z'
            }
        else:
            lev_vals = sigma_centers
            lev_attrs = {
                'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate',
                'axis': 'Z'
            }
        self._variables['lev'] = xr.Variable([
            'lev',
        ], lev_vals, lev_attrs)

        ## Latitude / Longitude
        # TODO: Add lon/lat bounds

        # Detect if we're on a nested grid; in that case, we'll have a displaced
        # origin set in the variable attributes we previously read
        ref_key = list(self._bpch.var_attrs.keys())[0]
        ref_attrs = self._bpch.var_attrs[ref_key]
        self.is_nested = (ref_attrs['origin'] != (1, 1, 1))

        lon_centers = self.ctm_info.lon_centers
        lat_centers = self.ctm_info.lat_centers

        if self.is_nested:
            ix, iy, _ = ref_attrs['origin']
            nx, ny, *_ = ref_attrs['original_shape']
            # Correct i{x,y} for IDL->Python indexing (1-indexed -> 0-indexed)
            ix -= 1
            iy -= 1
            lon_centers = lon_centers[ix:ix + nx]
            lat_centers = lat_centers[iy:iy + ny]

        self._variables['lon'] = xr.Variable(['lon'], lon_centers, {
            'long_name': 'longitude',
            'units': 'degrees_east'
        })
        self._variables['lat'] = xr.Variable(['lat'], lat_centers, {
            'long_name': 'latitude',
            'units': 'degrees_north'
        })
        # TODO: Fix longitudes if ctm_grid.center180

        # Add variables from the parsed BPCH file to our DataStore
        for vname in list(self._bpch.var_data.keys()):

            var_data = self._bpch.var_data[vname]
            var_attr = self._bpch.var_attrs[vname]

            if fields and (var_attr['name'] not in fields):
                continue
            if categories and (var_attr['category'] not in categories):
                continue

            # Process dimensions
            dims = [
                'time',
                'lon',
                'lat',
            ]
            dshape = var_attr['original_shape']
            if len(dshape) == 3:
                # Process the vertical coordinate. A few things can happen here:
                # 1) We have cell-centered values on the "Nlayer" grid; we can take these variables and map them to 'lev'
                # 2) We have edge value on an "Nlayer" + 1 grid; we can take these and use them with 'lev_edge'
                # 3) We have troposphere values on "Ntrop"; we can take these and use them with 'lev_trop', but we won't have coordinate information yet
                # All other cases we do not handle yet; this includes the aircraft emissions and a few other things. Note that tracer sources do not have a vertical coord to worry about!
                nlev = dshape[-1]
                grid_nlev = self.ctm_info.Nlayers
                grid_ntrop = self.ctm_info.Ntrop
                try:
                    if nlev == grid_nlev:
                        dims.append('lev')
                    elif nlev == grid_nlev + 1:
                        dims.append('lev_edge')
                    elif nlev == grid_ntrop:
                        dims.append('lev_trop')
                    else:
                        continue
                except AttributeError:
                    warnings.warn("Couldn't resolve grid_spec vertical layout")
                    continue

            # xarray Variables are thin wrappers for numpy.ndarrays, or really
            # any object that extends the ndarray interface. A critical part of
            # the original ndarray interface is that the underlying data has to
            # be contiguous in memory. We can enforce this to happen by
            # concatenating each bundle in the variable data bundles we read
            # from the bpch file
            data = self._concat([v.data for v in var_data])

            # Is the variable time-invariant? If it is, kill the time dim.
            # Here, we mean it only as one sample in the dataset.
            if data.shape[0] == 1:
                dims = dims[1:]
                data = data.squeeze()

            # Create a variable containing this data
            var = xr.Variable(dims, data, var_attr)

            # Shuffle dims for CF/COARDS compliance if requested
            # TODO: For this to work, we have to force a load of the data.
            #       Is there a way to re-write BPCHDataProxy so that that's not
            #       necessary?
            #       Actually, we can't even force a load becase var.data is a
            #       numpy.ndarray. Weird.
            # if fix_dims:
            #     target_dims = [d for d in DIM_ORDER_PRIORITY if d in dims]
            #     var = var.transpose(*target_dims)

            self._variables[vname] = var

            # Try to add a time dimension
            # TODO: Time units?
            if (len(var_data) > 1) and 'time' not in self._variables:
                time_bnds = np.asarray([v.time for v in var_data])
                times = time_bnds[:, 0]

                self._variables['time'] = xr.Variable(
                    [
                        'time',
                    ], times, {
                        'bounds': 'time_bnds',
                        'units': cf.CTM_TIME_UNIT_STR
                    })
                self._variables['time_bnds'] = xr.Variable(
                    ['time', 'nv'], time_bnds, {'units': cf.CTM_TIME_UNIT_STR})
                self._variables['nv'] = xr.Variable([
                    'nv',
                ], [0, 1])
Example #16
0
            to_save_ds = xr.Dataset(coords={"time": obs})

            for var in dicts.nc_meta.keys():
                # v = var
                f2.create_variable(to_save_ds, var, variables[var])

            ### ---------- adding the sonde_id var to the dataset --------- #####
            sonde_id = (status_ds.swap_dims({
                "sonde_id": "launch_time"
            }).sel(launch_time=sonde_ds[i].launch_time.values).sonde_id.values)
            attrs = {
                "descripion": "unique sonde ID",
                "long_name": "sonde identifier",
                "cf_role": "trajectory_id",
            }
            sonde_id_var = xr.Variable([], sonde_id, attrs=attrs)
            to_save_ds["sonde_id"] = sonde_id_var

            # file name
            file_name = (
                "EUREC4A_JOANNE"
                # + str(Platform)
                + "_Dropsonde-RD41_" + str(sonde_id) + "_Level_2" + "_v" +
                str(joanne.__version__) + ".nc")
            save_directory = "/Users/geet/Documents/JOANNE/Data/Level_2/"

            comp = dict(
                zlib=True,
                complevel=4,
                fletcher32=True,
                _FillValue=np.finfo("float32").max,
Example #17
0
def faces_dataset_to_latlon(ds,
                            metric_vector_pairs=[('dxC', 'dyC'),
                                                 ('dyG', 'dxG')]):
    """Transform a 13-face LLC xarray Dataset into a rectancular grid,
    discarding the Arctic.

    Parameters
    ----------
    ds : xarray.Dataset
        A 13-face LLC dataset
    metric_vector_pairs : list, optional
        Pairs of variables that are positive-definite metrics located at grid
        edges.

    Returns
    -------
    out : xarray.Dataset
        Transformed rectangular dataset
    """

    coord_vars = list(ds.coords)
    ds_new = _faces_coords_to_latlon(ds)

    vector_pairs = []
    scalars = []
    vnames = list(ds.reset_coords().variables)
    for vname in vnames:
        try:
            mate = ds[vname].attrs['mate']
            vector_pairs.append((vname, mate))
            vnames.remove(mate)
        except KeyError:
            pass

    all_vector_components = [
        inner for outer in (vector_pairs + metric_vector_pairs)
        for inner in outer
    ]
    scalars = [vname for vname in vnames if vname not in all_vector_components]
    data_vars = {}

    for vname in scalars:
        if vname == 'face' or vname in ds_new:
            continue
        if 'face' in ds[vname].dims:
            data = _faces_to_latlon_scalar(ds[vname].data)
            dims = _drop_facedim(ds[vname].dims)
        else:
            data = ds[vname].data
            dims = ds[vname].dims
        data_vars[vname] = xr.Variable(dims, data, ds[vname].attrs)

    for vname_u, vname_v in vector_pairs:
        data_u, data_v = _faces_to_latlon_vector(ds[vname_u].data,
                                                 ds[vname_v].data)
        data_vars[vname_u] = xr.Variable(_drop_facedim(ds[vname_u].dims),
                                         data_u, ds[vname_u].attrs)
        data_vars[vname_v] = xr.Variable(_drop_facedim(ds[vname_v].dims),
                                         data_v, ds[vname_v].attrs)
    for vname_u, vname_v in metric_vector_pairs:
        data_u, data_v = _faces_to_latlon_vector(ds[vname_u].data,
                                                 ds[vname_v].data,
                                                 metric=True)
        data_vars[vname_u] = xr.Variable(_drop_facedim(ds[vname_u].dims),
                                         data_u, ds[vname_u].attrs)
        data_vars[vname_v] = xr.Variable(_drop_facedim(ds[vname_v].dims),
                                         data_v, ds[vname_v].attrs)

    ds_new = ds_new.update(data_vars)
    ds_new = ds_new.set_coords([c for c in coord_vars if c in ds_new])
    return ds_new
Example #18
0
    def test_diff_array_repr(self):
        da_a = xr.DataArray(
            np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"),
            dims=("x", "y"),
            coords={
                "x": np.array(["a", "b"], dtype="U1"),
                "y": np.array([1, 2, 3], dtype="int64"),
            },
            attrs={"units": "m", "description": "desc"},
        )

        da_b = xr.DataArray(
            np.array([1, 2], dtype="int64"),
            dims="x",
            coords={
                "x": np.array(["a", "c"], dtype="U1"),
                "label": ("x", np.array([1, 2], dtype="int64")),
            },
            attrs={"units": "kg"},
        )

        byteorder = "<" if sys.byteorder == "little" else ">"
        expected = dedent(
            """\
        Left and right DataArray objects are not identical
        Differing dimensions:
            (x: 2, y: 3) != (x: 2)
        Differing values:
        L
            array([[1, 2, 3],
                   [4, 5, 6]], dtype=int64)
        R
            array([1, 2], dtype=int64)
        Differing coordinates:
        L * x        (x) %cU1 'a' 'b'
        R * x        (x) %cU1 'a' 'c'
        Coordinates only on the left object:
          * y        (y) int64 1 2 3
        Coordinates only on the right object:
            label    (x) int64 1 2
        Differing attributes:
        L   units: m
        R   units: kg
        Attributes only on the left object:
            description: desc"""
            % (byteorder, byteorder)
        )

        actual = formatting.diff_array_repr(da_a, da_b, "identical")
        try:
            assert actual == expected
        except AssertionError:
            # depending on platform, dtype may not be shown in numpy array repr
            assert actual == expected.replace(", dtype=int64", "")

        va = xr.Variable(
            "x", np.array([1, 2, 3], dtype="int64"), {"title": "test Variable"}
        )
        vb = xr.Variable(("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"))

        expected = dedent(
            """\
        Left and right Variable objects are not equal
        Differing dimensions:
            (x: 3) != (x: 2, y: 3)
        Differing values:
        L
            array([1, 2, 3], dtype=int64)
        R
            array([[1, 2, 3],
                   [4, 5, 6]], dtype=int64)"""
        )

        actual = formatting.diff_array_repr(va, vb, "equals")
        try:
            assert actual == expected
        except AssertionError:
            assert actual == expected.replace(", dtype=int64", "")
Example #19
0
    def write_crs(self, input_crs=None, grid_mapping_name=None, inplace=False):
        """
        Write the CRS to the dataset in a CF compliant manner.

        Parameters
        ----------
        input_crs: object
            Anything accepted by `rasterio.crs.CRS.from_user_input`.
        grid_mapping_name: str, optional
            Name of the grid_mapping coordinate to store the CRS information in.
            Default is the grid_mapping name of the dataset.
        inplace: bool, optional
            If True, it will write to the existing dataset. Default is False.

        Returns
        -------
        :obj:`xarray.Dataset` | :obj:`xarray.DataArray`:
            Modified dataset with CF compliant CRS information.

        Examples
        --------
        Write the CRS of the current `xarray` object:

        >>> raster.rio.write_crs("epsg:4326", inplace=True)

        Write the CRS on a copy:

        >>> raster = raster.rio.write_crs("epsg:4326")
        """
        if input_crs is not None:
            data_obj = self.set_crs(input_crs, inplace=inplace)
        else:
            data_obj = self._get_obj(inplace=inplace)

        # get original transform
        transform = self._cached_transform()
        # remove old grid maping coordinate if exists
        grid_mapping_name = (self.grid_mapping if grid_mapping_name is None
                             else grid_mapping_name)
        try:
            del data_obj.coords[grid_mapping_name]
        except KeyError:
            pass

        if data_obj.rio.crs is None:
            raise MissingCRS(
                "CRS not found. Please set the CRS with 'rio.write_crs()'.")
        # add grid mapping coordinate
        data_obj.coords[grid_mapping_name] = xarray.Variable((), 0)
        if get_option(EXPORT_GRID_MAPPING):
            grid_map_attrs = pyproj.CRS.from_user_input(
                data_obj.rio.crs).to_cf()
        else:
            grid_map_attrs = {}
        # spatial_ref is for compatibility with GDAL
        crs_wkt = data_obj.rio.crs.to_wkt()
        grid_map_attrs["spatial_ref"] = crs_wkt
        grid_map_attrs["crs_wkt"] = crs_wkt
        if transform is not None:
            grid_map_attrs["GeoTransform"] = " ".join(
                [str(item) for item in transform.to_gdal()])
        data_obj.coords[grid_mapping_name].rio.set_attrs(grid_map_attrs,
                                                         inplace=True)

        return data_obj.rio.write_grid_mapping(
            grid_mapping_name=grid_mapping_name, inplace=True)
Example #20
0
def test_CFMaskCoder_decode():
    original = xr.Variable(("x",), [0, -1, 1], {"_FillValue": -1})
    expected = xr.Variable(("x",), [0, np.nan, 1])
    coder = variables.CFMaskCoder()
    encoded = coder.decode(original)
    assert_identical(expected, encoded)
Example #21
0
 def _interpolate_to_raster(cls, variable, biggest_variable):
     shape = biggest_variable.shape
     full_size_array = resample_2d(variable.values, shape[1], shape[0])
     return xr.Variable(shape, full_size_array)
Example #22
0
def test_coder_roundtrip():
    original = xr.Variable(("x",), [0.0, np.nan, 1.0])
    coder = variables.CFMaskCoder()
    roundtripped = coder.decode(coder.encode(original))
    assert_identical(original, roundtripped)
Example #23
0
 def compute(self, data, selected_indexes):
     observed = data.time.values[selected_indexes]
     observed_date = xarray.Variable(('y', 'x'),
                                     datetime64_to_inttime(observed))
     return self._var_name, observed_date
Example #24
0
def load_single(file,
                drop_ghost=True,
                use_dask=True,
                var_list="all",
                ini_file=None):
    """Load a single step file and generate an xarray Dataset

    Parameters
    ----------
    file : str or Path
        Location of the file to load
    drop_ghost : bool, optional
        Drop all of the ghost cells, by default True
    var_list : List, optional
        Load only a specific set of variables, by default 'all'

    Returns
    -------
    xarray Dataset
    """

    if var_list == "all":
        var_list = [
            "density",
            "pressure",
            "sound_speed",
            "x_velocity",
            "y_velocity",
            "ghost_cell",
            "deposited_energy",
            "deposited_power",
        ]

    data_vars = {}
    space_dims = ("i", "j")

    if not file.endswith(".h5"):
        raise Exception("Step files must be .h5 files")

    h5 = h5py.File(file, "r")

    for v in var_list:
        try:
            h5[f"/{v}"].shape
        except KeyError:
            continue

        if use_dask:
            chunk_size = h5[f"/{v}"].shape
            array = da.from_array(h5[f"/{v}"], chunks=chunk_size)
            array = da.transpose(array)
        else:
            array = h5[f"/{v}"][()].T.astype(np.float32)

        try:
            long_name = var_dict[v]["long_name"]
        except Exception:
            long_name = ""

        try:
            description = h5[f"/{v}"].attrs["description"].decode("utf-8")
        except Exception:
            description = ""

        try:
            standard_name = var_dict[v]["standard_name"]
        except Exception:
            standard_name = ""

        try:
            units = h5[f"/{v}"].attrs["units"].decode("utf-8")
        except Exception:
            units = ""

        data_vars[f"{v}"] = xr.Variable(
            space_dims,
            array,
            attrs={
                "units": units,
                "description": description,
                "long_name": long_name,
                "standard_name": standard_name,
            },
        )

    x = h5[f"/x"][()].T.astype(np.float32)
    x_units = h5[f"/x"].attrs["units"].decode("utf-8")
    y = h5[f"/y"][()].T.astype(np.float32)

    # Get the cell centers
    dy = (np.diff(x[0, :]) / 2.0)[0]
    dx = (np.diff(y[:, 0]) / 2.0)[0]

    # cell center locations
    xc = x[:-1, 0] + dx
    yc = y[0, :-1] + dy

    coords = {
        "time": h5[f"/time"][()].astype(np.float32),
        "x": (["i"], xc),
        "y": (["j"], yc),
    }

    time_units = h5[f"/time"].attrs["units"].decode("utf-8")

    # Get the details about the CATO build
    info_attr = {}
    info = [
        "build_type",
        "compile_hostname",
        "compile_os",
        "compiler_flags",
        "compiler_version",
        "git_changes",
        "git_hash",
        "git_ref",
        "version",
    ]
    for v in info:
        try:
            info_attr[v] = h5["/cato_info"].attrs[f"{v}"].decode("utf-8")
        except Exception:
            pass

    attr_dict = info_attr
    attr_dict["time_units"] = time_units
    attr_dict["space_units"] = x_units

    if ini_file:
        input_dict = read_ini(ini_file)
        attr_dict.update(input_dict)

    ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attr_dict)
    if ini_file:
        try:
            ds.attrs["title"] = ds.attrs["general_title"]
        except Exception:
            pass

    if drop_ghost:
        try:
            ds = ds.where(ds["ghost_cell"] == 0, drop=True)
            return ds.drop("ghost_cell")
        except KeyError:
            return ds
    else:
        return ds
Example #25
0
    for f in filenames_albedo:
        tif_file = rasterio.open(tif_dir + f)
        out_name = tmp_dir + "/" + Path(f).stem + ".tif"
        modis_functions.tif_clip(tif_file, tile_shp, out_name)
    for f in filenames_qc:
        tif_file = rasterio.open(tif_dir + f)
        out_name = tmp_dir + "/" + Path(f).stem + ".tif"
        modis_functions.tif_clip(tif_file, tile_shp, out_name)

    # Definition of the flags and criteria for choosing them is presented in:
    # https://daac.ornl.gov/ABOVE/guides/Albedo_Boreal_North_America.html
    # https://doi.org/10.1111/gcb.14888

    summer_flag = [0, 1, 2, 4, 5, 6, 16, 17, 18, 20, 21, 22]
    winter_flag = [0, 1, 2, 3, 4, 5, 6, 7, 15, 16, 17, 18, 19, 20, 21, 22, 23]
    date_xr = xr.Variable("time", date)
    a = xr.open_rasterio(tmp_dir + "/" + filenames_albedo[0])
    chunks = {"x": int(a.sizes["x"]), "y": int(a.sizes["x"]), "band": 1}
    da_albedo_init = xr.open_rasterio(tmp_dir + "/" + filenames_albedo[0],
                                      chunks=chunks)
    da_qc_init = xr.open_rasterio(tmp_dir + "/" + filenames_qc[0],
                                  chunks=chunks)

    if 5 <= date[0].month <= 9:
        # Summer months
        da_albedo_init = da_albedo_init.where(da_qc_init.isin(summer_flag))
    else:
        # Winter months
        da_albedo_init = da_albedo_init.where(da_qc_init.isin(winter_flag))
    ds_init = da_albedo_init.to_dataset(name="Albedo")
    ds_init = ds_init.assign_coords({"time": date_xr[0]})
Example #26
0
    def __init__(self,
                 data_dir,
                 grid_dir=None,
                 iternum=None,
                 delta_t=1,
                 read_grid=True,
                 file_prefixes=None,
                 ref_date=None,
                 calendar=None,
                 geometry='sphericalpolar',
                 endian='>',
                 ignore_unknown_vars=False,
                 default_dtype=np.dtype('f4'),
                 nx=None,
                 ny=None,
                 nz=None,
                 llc_method="smallchunks"):
        """
        This is not a user-facing class. See open_mdsdataset for argument
        documentation. The only ones which are distinct are.

        Parameters
        ----------
        iternum : int, optional
            The iteration timestep number to read.
        file_prefixes : list
            The prefixes of the data files to be read.
        """

        self.geometry = geometry.lower()
        allowed_geometries = [
            'cartesian', 'sphericalpolar', 'llc', 'curvilinear'
        ]
        if self.geometry not in allowed_geometries:
            raise ValueError('Unexpected value for parameter `geometry`. '
                             'It must be one of the following: %s' %
                             allowed_geometries)

        # the directory where the files live
        self.data_dir = data_dir
        self.grid_dir = grid_dir if (grid_dir is not None) else data_dir
        self._ignore_unknown_vars = ignore_unknown_vars

        # The endianness of the files
        # By default, MITgcm does big endian
        if endian not in ['>', '<', '=']:
            raise ValueError("Invalid byte order (endian=%s)" % endian)
        self.endian = endian
        if default_dtype is not None:
            self.default_dtype = np.dtype(default_dtype).newbyteorder(endian)
        else:
            self.default_dtype = default_dtype

        # storage dicts for variables and attributes
        self._variables = xr.core.pycompat.OrderedDict()
        self._attributes = xr.core.pycompat.OrderedDict()
        self._dimensions = []

        # the dimensions are theoretically the same for all datasets
        [self._dimensions.append(k) for k in dimensions]
        self.llc = (self.geometry == 'llc')

        # TODO: and maybe here a check for the presence of layers?

        # we don't need to know ny if using llc
        if self.llc and (nx is not None):
            ny = nx

        # Now we need to figure out the dimensions of the numerical domain,
        # nx, ny, nz
        # nface is the number of llc faces
        if (nz is not None) and (ny is not None) and (nz is not None):
            # we have been passed enough information to determine the
            # dimensions without reading any files
            self.nz, self.ny, self.nx = nz, ny, nx
            self.nface = LLC_NUM_FACES if self.llc else None
        else:
            # have to peek at the grid file metadata
            self.nz, self.nface, self.ny, self.nx = (_guess_model_dimensions(
                self.grid_dir, self.llc))
        self.layers = _guess_layers(data_dir)

        if self.llc:
            nyraw = self.nx * self.nface
        else:
            nyraw = self.ny
        self.default_shape_3D = (self.nz, nyraw, self.nx)
        self.default_shape_2D = (nyraw, self.nx)
        self.llc_method = llc_method

        # Now set up the corresponding coordinates.
        # Rather than assuming the dimension names, we use Comodo conventions
        # to parse the dimension metdata.
        # http://pycomodo.forge.imag.fr/norm.html
        irange = np.arange(self.nx)
        jrange = np.arange(self.ny)
        krange = np.arange(self.nz)
        krange_p1 = np.arange(self.nz + 1)
        # the keys are `standard_name` attribute
        dimension_data = {
            "x_grid_index": irange,
            "x_grid_index_at_u_location": irange,
            "x_grid_index_at_f_location": irange,
            "y_grid_index": jrange,
            "y_grid_index_at_v_location": jrange,
            "y_grid_index_at_f_location": jrange,
            "z_grid_index": krange,
            "z_grid_index_at_lower_w_location": krange,
            "z_grid_index_at_upper_w_location": krange,
            "z_grid_index_at_w_location": krange_p1,
        }

        for dim in self._dimensions:
            dim_meta = dimensions[dim]
            dims = dim_meta['dims']
            attrs = dim_meta['attrs']
            data = dimension_data[attrs['standard_name']]
            dim_variable = xr.Variable(dims, data, attrs)
            self._variables[dim] = dim_variable

        # possibly add the llc dimension
        # seems sloppy to hard code this here
        # TODO: move this metadata to variables.py
        if self.llc:
            self._dimensions.append(LLC_FACE_DIMNAME)
            data = np.arange(self.nface)
            attrs = {'standard_name': 'face_index'}
            dims = [LLC_FACE_DIMNAME]
            self._variables[LLC_FACE_DIMNAME] = xr.Variable(dims, data, attrs)

        # do the same for layers
        for layer_name, n_layer in self.layers.items():
            for suffix, offset in zip(['bounds', 'center', 'interface'],
                                      [0, -1, -2]):
                # e.g. "layer_1RHO_bounds"
                # dimname = 'layer_' + layer_name + '_' + suffix
                # e.g. "l1_b"
                dimname = 'l' + layer_name[0] + '_' + suffix[0]
                self._dimensions.append(dimname)
                data = np.arange(n_layer + offset)
                # we should figure out a way to properly populate the layers
                # attributes
                attrs = {
                    'standard_name':
                    layer_name + '_layer_grid_index_at_layer_' + suffix,
                    'swap_dim': 'layer_' + layer_name + '_' + suffix
                }
                dim_variable = xr.Variable([dimname], data, attrs)
                self._variables[dimname] = dim_variable

        # maybe add a time dimension
        if iternum is not None:
            self.time_dim_name = 'time'
            self._dimensions.append(self.time_dim_name)
            # a variable for iteration number
            self._variables['iter'] = xr.Variable(
                (self.time_dim_name, ), [iternum], {
                    'standard_name': 'timestep',
                    'long_name': 'model timestep number'
                })
            self._variables[
                self.time_dim_name] = _iternum_to_datetime_variable(
                    iternum, delta_t, ref_date, calendar, self.time_dim_name)

        # build lookup tables for variable metadata
        self._all_grid_variables = _get_all_grid_variables(
            self.geometry, self.layers)
        self._all_data_variables = _get_all_data_variables(
            self.data_dir, self.layers)

        # The rest of the data has to be read from disk.
        # The list `prefixes` specifies file prefixes from which to infer
        # The problem with this is that some prefixes are single variables
        # while some are multi-variable diagnostics files.
        prefixes = []
        if read_grid:
            prefixes = prefixes + list(self._all_grid_variables.keys())

        # add data files
        prefixes = (
            prefixes +
            _get_all_matching_prefixes(data_dir, iternum, file_prefixes))

        for p in prefixes:
            # use a generator to loop through the variables in each file
            for (vname, dims, data,
                 attrs) in self.load_from_prefix(p, iternum):
                # print(vname, dims, data.shape)
                #Sizes of grid variables can vary between mitgcm versions. Check for
                #such inconsistency and correct if so
                (vname, dims, data, attrs) = self.fix_inconsistent_variables(
                    vname, dims, data, attrs)

                thisvar = xr.Variable(dims, data, attrs)
                self._variables[vname] = thisvar
Example #27
0
def driver(args):

  gridpath = './script_files/' #args.gridpath #'/glade/work/gmarques/cesm/mom6_input_files/tx0.66v1/salinity_restoring'
  if not os.path.isdir(gridpath):
    print('Creating a directory to place SCRIP files: {} ... \n'.format(gridpath))
    os.system('mkdir '+gridpath)

  esmlab.config.set({'regrid.gridfile-directory': gridpath})
  # src and dst grids
  if args.src_grid_name == 'WOA_01':
    src_grid_name = 'WOA_01_SCRIP'
    os.system('ln -s  /glade/work/gmarques/cesm/datasets/WOA18/WOA_01_SCRIP.nc '+gridpath)
  elif args.src_grid_name == 'WOA_04':
    src_grid_name = 'WOA_04_SCRIP'
    os.system('ln -s  /glade/work/gmarques/cesm/datasets/WOA18/WOA_04_SCRIP.nc '+gridpath)
  else:
    raise ValueError('The source grid name provided, {}, is not supported. Please use WOA_01 or WOA_04.'.format(args.src_grid_name))

  if args.dst_grid_name == 'tx0.66v1':
    dst_grid_name = 'tx0.66v1_SCRIP_190314'
    os.system('ln -s  /glade/work/altuntas/mom.input/tx0.66v1/gen_grid_190314/tx0.66v1_SCRIP_190314.nc '+gridpath)
    # prototype for the restoring file for the tx0.66v1 grid
    ds_out = xr.open_dataset('/glade/p/cesmdata/cseg/inputdata/ocn/mom/tx0.66v1/salt_restore_tx0.66v1_180828.nc',
             decode_times=False)
    ds_out['theta0'] = xr.Variable(dims=('TIME','LAT','LON'), data = np.zeros(ds_out.salt.shape))
  elif args.dst_grid_name == 'tx0.1v3':
    dst_grid_name = 'tx0.1v3_SCRIP_200721'
    os.system('ln -s  /glade/work/gmarques/Projects/MOM_tx0.1_v3/tx0.1v3_SCRIP_200721.nc '+gridpath)
    # prototype for the restoring file for the tx0.66v1 grid
    ds_out = xr.open_dataset('/glade/work/gmarques/Projects/MOM_tx0.1_v3/salt_restore_tx0.1v3_200807.nc',
             decode_times=False)
    ds_out['theta0'] = xr.Variable(dims=('TIME','LAT','LON'), data = np.zeros(ds_out.salt.shape))
  else:
    raise ValueError('The destination grid name provided, {}, is not supported. Only tx0.66v1 is supported at this point. '.format(args.dst_grid_name))

  # generate weights
  R_bilinear = esmlab_regrid.regridder(name_grid_src=src_grid_name, name_grid_dst=dst_grid_name,
                            method='bilinear', overwrite_existing=True)

  ###########################################################################
  # WOA salinity file with land fill, created using create_filled_sfc.py
  woa = xr.open_dataset(args.infile, decode_times=False)

  # average between two-layers (depth = 0 and depth = 10, depth indices 0 and 2)
  woa_s_an_surface_ave = woa.s_an.isel(depth=[0,2]).mean('depth')
  woa_theta0_surface_ave = woa.theta0.isel(depth=[0,2]).mean('depth')

  # regrid and compare against original
  for m in range(len(woa.time)):
    ds_out.salt[m,:] = R_bilinear(woa_s_an_surface_ave[m,:]).rename({'lat':'LAT', 'lon':'LON'})
    ds_out.theta0[m,:] = R_bilinear(woa_theta0_surface_ave[m,:]).rename({'lat':'LAT', 'lon':'LON'})

  ###########################################################################
  # Global attrs
  ds_out.attrs['title'] = 'surface salinity and potential temperature from WOA filled over continents'
  ds_out.attrs['src_file'] = args.infile
  ds_out.attrs['src_grid_name'] = args.src_grid_name
  ds_out.attrs['dst_grid_name'] = args.dst_grid_name
  ds_out.attrs['author'] = args.author
  ds_out.attrs['date'] = datetime.now().isoformat()
  ds_out.attrs['created_using'] = os.path.basename(__file__) + ' -path_out ' + args.path_out + ' -author ' + \
    args.author + ' -infile ' + args.infile + ' -src_grid_name ' + args.src_grid_name + \
    ' -dst_grid_name ' + args.dst_grid_name
  ds_out.attrs['url'] = os.path.basename(__file__) + ' can be found at https://github.com/NCAR/WOA_MOM6'
  ds_out.attrs['git_hash'] = str(subprocess.check_output(["git", "describe","--always"]).strip())
  # save
  fname = 'state_restore_{}_{}{}{}.nc'.format(args.dst_grid_name, datetime.now().isoformat()[0:4],datetime.now().isoformat()[5:7],
           datetime.now().isoformat()[8:10])
  ds_out.to_netcdf(args.path_out+fname)
  print('Done!')
  return
Example #28
0
plev_xr = data['plev']
lat_xr = data['lat']
k_xr = data['k']
plev = plev_xr.values
lat = lat_xr.values
k = k_xr.values
f = data['f'].values

# Transpose
# NOTE: This is to speed things up when looping over the array
data = data.transpose('lat', 'plev', 'k', 'f')

# Output dataset
cph_q = np.arange(-50, 50.01, 1.0)  # try to reduce the resolution!
cph_xr = xr.Variable(('c', ), cph_q, {
    'long_name': 'phase speed',
    'units': 'm/s'
})
data_c = xr.Dataset({},
                    coords={
                        'lat': lat_xr,
                        'plev': plev_xr,
                        'k': k_xr,
                        'c': cph_xr
                    })

# Iterate over names, lattiudes, longitudes
# Problem is that 'phase speed' is actually a line through the wavenumber,
# frequency, and latitude dimensions!
# As suggested by Randel and Held, *interpolate* from omega by k
# to phase speed by k (basically interpolates to diagonal lines in
# an omega by k plot, since cph == omega/k). See:
Example #29
0
def make_xrvar(dim_lengths):
    return xr.Variable(tuple(dim_lengths.keys()),
                       make_sparray(shape=tuple(dim_lengths.values())))
Example #30
0
    def get_dataset(self,
                    varnames=None,
                    iter_start=None,
                    iter_stop=None,
                    iter_step=None,
                    iters=None,
                    k_levels=None,
                    k_chunksize=1,
                    type='faces',
                    read_grid=True,
                    grid_vars_to_coords=True):
        """
        Create an xarray Dataset object for this model.

        Parameters
        ----------
        *varnames : list of strings, optional
            The variables to include, e.g. ``['Salt', 'Theta']``. Otherwise
            include all known variables.
        iter_start : int, optional
            Starting iteration number. Otherwise use model default.
            Follows standard `range` conventions. (inclusive)
        iter_stop : int, optional
            Stopping iteration number. Otherwise use model default.
            Follows standard `range` conventions. (exclusive)
        iter_step : int, optional
            Iteration number stepsize. Otherwise use model default.
        iters : list of ints, optional
            Specific iteration numbers in a list, possibly with nonuniform spacing.
            Either provide this or the iter parameters above.
        k_levels : list of ints, optional
            Vertical levels to extract. Default is to get them all
        k_chunksize : int, optional
            How many vertical levels per Dask chunk.
        type : {'faces', 'latlon'}, optional
            What type of dataset to create
        read_grid : bool, optional
            Whether to read the grid info
        grid_vars_to_coords : bool, optional
            Whether to promote grid variables to coordinate status

        Returns
        -------
        ds : xarray.Dataset
        """
        def _if_not_none(a, b):
            if a is None:
                return b
            else:
                return a

        user_iter_params = [iter_start, iter_stop, iter_step]
        attribute_iter_params = [
            self.iter_start, self.iter_stop, self.iter_step
        ]

        # If the user has specified some iter params:
        if any([a is not None for a in user_iter_params]):
            # If iters is also set we have a problem
            if iters is not None:
                raise ValueError(
                    "Only `iters` or the parameters `iter_start`, `iters_stop`, "
                    "and `iter_step` can be provided. Both were provided")

            # Otherwise we can override any missing values
            iter_start = _if_not_none(iter_start, self.iter_start)
            iter_stop = _if_not_none(iter_stop, self.iter_stop)
            iter_step = _if_not_none(iter_step, self.iter_step)
            iter_params = [iter_start, iter_stop, iter_step]
            if any([a is None for a in iter_params]):
                raise ValueError(
                    "The parameters `iter_start`, `iter_stop`, "
                    "and `iter_step` must be defined either by the "
                    "model class or as argument. Instead got %r " %
                    iter_params)

        # Otherwise try loading from the user set iters
        elif iters is not None:
            pass

        # Now have a go at using the attribute derived iteration parameters
        elif all([a is not None for a in attribute_iter_params]):
            iter_params = attribute_iter_params

        # Now try using the attribute derived iters
        elif self.iters is not None:
            iters = self.iters

        # Now give up
        else:
            raise ValueError(
                "The parameters `iter_start`, `iter_stop`, "
                "and `iter_step`, or `iters` must be defined either by the "
                "model class or as argument")

        # Check the iter_start and iter_step
        if iters is None:
            self._check_iter_start(iter_params[0])
            self._check_iter_step(iter_params[2])
            iters = np.arange(*iter_params)
        else:
            self._check_iters(iters)
            iters = np.array(iters)

        varnames = varnames or self.varnames

        # grid stuff
        read_grid = read_grid and len(self.grid_varnames) != 0
        if read_grid and self.store.grid_path is None:
            raise TypeError(
                'Cannot read grid if grid_path is not specified in filestore (e.g. llcreader.known_models)'
            )
        grid_vars_to_coords = grid_vars_to_coords and read_grid
        grid_varnames = self.grid_varnames if read_grid else []

        ds = self._make_coords_faces(iters)
        if type == 'latlon':
            if self.domain == 'aste':
                raise TypeError(
                    'Swapping to lat/lon not available for ASTE. Must regrid or interpolate.'
                )
            ds = _faces_coords_to_latlon(ds)

        k_levels = k_levels or list(range(self.nz))
        kp1_levels = self._get_kp1_levels(k_levels)

        ds = ds.sel(k=k_levels, k_l=k_levels, k_u=k_levels, k_p1=kp1_levels)

        # get the data in facet form
        data_facets = {
            vname: self._get_facet_data(vname, iters, k_levels, k_chunksize)
            for vname in varnames
        }

        # get the grid in facet form
        # do separately for vertical coords on kp1_levels
        grid_facets = {}
        for vname in grid_varnames:
            my_k_levels = k_levels if _VAR_METADATA[vname]['dims'] != [
                'k_p1'
            ] else kp1_levels
            grid_facets[vname] = self._get_facet_data(vname, None, my_k_levels,
                                                      k_chunksize)

        # transform it into faces or latlon
        data_transformers = {
            'faces': _all_facets_to_faces,
            'latlon': _all_facets_to_latlon
        }

        transformer = data_transformers[type]
        data = transformer(data_facets, _VAR_METADATA, self.nface)

        # separate horizontal and vertical grid variables
        hgrid_facets = {
            key: grid_facets[key]
            for key in grid_varnames if not _is_vgrid(key)
        }
        vgrid_facets = {
            key: grid_facets[key]
            for key in grid_varnames if _is_vgrid(key)
        }

        # do not transform vertical grid variables
        data.update(transformer(hgrid_facets, _VAR_METADATA, self.nface))
        data.update(vgrid_facets)

        variables = {}
        gridlist = ['Zl', 'Zu'] if read_grid else []
        for vname in varnames + grid_varnames:
            meta = _VAR_METADATA[vname]
            dims = meta['dims']
            if type == 'faces':
                dims = _add_face_to_dims(dims)
            dims = [
                'time',
            ] + dims if vname not in grid_varnames else dims
            attrs = meta['attrs']

            # Handle grid names different from filenames
            fname = vname
            vname = meta['real_name'] if 'real_name' in meta else vname
            if fname in grid_varnames:
                gridlist.append(vname)

            variables[vname] = xr.Variable(dims, data[fname], attrs)

        # handle vertical coordinate after the fact
        if read_grid and 'RF' in grid_varnames:
            ki = np.array([list(kp1_levels).index(x) for x in k_levels])
            for zv, sl in zip(['Zl', 'Zu'], [ki, ki + 1]):
                variables[zv] = xr.Variable(_VAR_METADATA[zv]['dims'],
                                            data['RF'][sl],
                                            _VAR_METADATA[zv]['attrs'])

        ds = ds.update(variables)

        if grid_vars_to_coords:
            ds = ds.set_coords(gridlist)

        return ds