Example #1
0
    def northTest(self, neigs=None, vfscaled=False):
        """Typical errors for eigenvalues.

        The method of North et al. (1982) is used to compute the typical
        error for each eigenvalue. It is assumed that the number of
        times in the input data set is the same as the number of
        independent realizations. If this assumption is not valid then
        the result may be inappropriate.

        **Optional arguments:**

        *neigs*
            The number of eigenvalues to return typical errors for.
            Defaults to typical errors for all eigenvalues.

        *vfscaled*
            If *True* scale the errors by the sum of the eigenvalues.
            This yields typical errors with the same scale as the values
            returned by `Eof.varianceFraction`. If *False* then no
            scaling is done. Defaults to *False* (no scaling).

        **Returns:**

        *errors*
            A `~xarray.DataArray` containing the typical errors for each
            eigenvalue. The egienvalues are numbered from 0 to
            *neigs* - 1.

        **References**

        North G.R., T.L. Bell, R.F. Cahalan, and F.J. Moeng (1982)
        Sampling errors in the estimation of empirical orthogonal
        functions. *Mon. Weather. Rev.*, **110**, pp 669-706.

        **Examples:**

        Typical errors for all eigenvalues::

            errors = solver.northTest()

        Typical errors for the first 3 eigenvalues scaled by the sum of
        the eigenvalues::

            errors = solver.northTest(neigs=3, vfscaled=True)

        """
        typerrs = self._solver.northTest(neigs=neigs, vfscaled=vfscaled)
        eofdim = xr.Coordinate('mode',
                               range(typerrs.shape[0]),
                               attrs={'long_name': 'eof_mode_number'})
        coords = [eofdim]
        long_name = 'typical_errors'
        typerrs = xr.DataArray(typerrs,
                               coords=coords,
                               name='typical_errors',
                               attrs={'long_name': long_name})
        return typerrs
Example #2
0
def _wrap_xarray(reference, lats, lons):
    try:
        import xarray as xr
    except ImportError:
        try:
            import xray as xr
        except ImportError:
            raise ValueError("cannot use container 'xarray' without xarray")
    londim = xr.Coordinate('longitude', lons,
                           attrs={'standard_name': 'longitude',
                                  'units': 'degrees_east'})
    latdim = xr.Coordinate('latitude', lats,
                           attrs={'standard_name': 'latitude',
                                  'units': 'degrees_north'})
    for name in reference.keys():
        reference[name] = xr.DataArray(reference[name],
                                       coords=[latdim, londim],
                                       attrs={'long_name': name})
Example #3
0
def _wrap_xarray(solution, neofs, time_units):
    try:
        import xarray as xr
    except ImportError:
        try:
            import xray as xr
        except ImportError:
            raise ValueError("cannot use container 'xarray' without "
                             "the xarray/xray module")
    time_dim = xr.Coordinate('time', solution['time'])
    lat_dim = xr.Coordinate('latitude', solution['latitude'])
    lon_dim = xr.Coordinate('longitude', solution['longitude'])
    eof_dim = xr.Coordinate('eof', np.arange(1, neofs + 1))
    solution['sst'] = xr.DataArray(solution['sst'],
                                   coords=[time_dim, lat_dim, lon_dim])
    solution['eigenvalues'] = xr.DataArray(solution['eigenvalues'],
                                           coords=[eof_dim])
    solution['eofs'] = xr.DataArray(solution['eofs'],
                                    coords=[eof_dim, lat_dim, lon_dim])
Example #4
0
    def eofs(self, eofscaling=0, neofs=None):
        """Emipirical orthogonal functions (EOFs).

        **Optional arguments:**

        *eofscaling*
            Sets the scaling of the EOFs. The following values are
            accepted:

            * *0* : Un-scaled EOFs (default).
            * *1* : EOFs are divided by the square-root of their
              eigenvalues.
            * *2* : EOFs are multiplied by the square-root of their
              eigenvalues.

        *neofs*
            Number of EOFs to return. Defaults to all EOFs. If the
            number of EOFs requested is more than the number that are
            available, then all available EOFs will be returned.

        **Returns:**

        *eofs*
           A `~xarray.DataArray` containing the ordered EOFs. The EOFs
           are numbered from 0 to *neofs* - 1.

        **Examples:**

        All EOFs with no scaling::

            eofs = solver.eofs()

        First 3 EOFs with scaling applied::

            eofs = solver.eofs(neofs=3, eofscaling=1)

        """
        eofs = self._solver.eofs(eofscaling, neofs)
        eofdim = xr.Coordinate('mode',
                               range(eofs.shape[0]),
                               attrs={'long_name': 'eof_mode_number'})
        coords = [eofdim] + self._coords
        long_name = 'empirical_orthogonal_functions'
        eofs = xr.DataArray(eofs,
                            coords=coords,
                            name='eofs',
                            attrs={'long_name': long_name})
        eofs.coords.update({
            coord.name: (coord.dims, coord)
            for coord in self._space_ndcoords
        })
        return eofs
Example #5
0
    def eofsAsCorrelation(self, neofs=None):
        """
        Empirical orthogonal functions (EOFs) expressed as the
        correlation between the principal component time series (PCs)
        and the time series of the `Eof` input *dataset* at each grid
        point.

        .. note::

            These are not related to the EOFs computed from the
            correlation matrix.

        **Optional argument:**

        *neofs*
            Number of EOFs to return. Defaults to all EOFs. If the
            number of EOFs requested is more than the number that are
            available, then all available EOFs will be returned.

        **Returns:**

        *eofs*
           A `~xarray.DataArray` containing the ordered EOFs. The EOFs
           are numbered from 0 to *neofs* - 1.

        **Examples:**

        All EOFs::

            eofs = solver.eofsAsCorrelation()

        The leading EOF::

            eof1 = solver.eofsAsCorrelation(neofs=1)

        """
        eofs = self._solver.eofsAsCorrelation(neofs)
        eofdim = xr.Coordinate('mode',
                               range(eofs.shape[0]),
                               attrs={'long_name': 'eof_mode_number'})
        coords = [eofdim] + self._coords
        long_name = 'correlation_between_pcs_and_{!s}'.format(self._name)
        eofs = xr.DataArray(eofs,
                            coords=coords,
                            name='eofs',
                            attrs={'long_name': long_name})
        eofs.coords.update({
            coord.name: (coord.dims, coord)
            for coord in self._space_ndcoords
        })
        return eofs
Example #6
0
    def pcs(self, pcscaling=0, npcs=None):
        """Principal component time series (PCs).

        **Optional arguments:**

        *pcscaling*
            Set the scaling of the retrieved PCs. The following
            values are accepted:

            * *0* : Un-scaled principal components (default).
            * *1* : Principal components are scaled to unit variance
              (divided by the square-root of their eigenvalue).
            * *2* : Principal components are multiplied by the
              square-root of their eigenvalue.

        *npcs*
            Number of PCs to retrieve. Defaults to all the PCs. If the
            number of requested PCs is more than the number that are
            available, then all available PCs will be returned.

        **Returns:**

        *pcs*
            A `~xarray.DataArray` containing the ordered PCs. The PCs
            are numbered from 0 to *npcs* - 1.

        **Examples:**

        All un-scaled PCs::

            pcs = solver.pcs()

        First 3 PCs scaled to unit variance::

            pcs = solver.pcs(npcs=3, pcscaling=1)

        """
        pcs = self._solver.pcs(pcscaling, npcs)
        pcdim = xr.Coordinate('mode',
                              range(pcs.shape[1]),
                              attrs={'long_name': 'eof_mode_number'})
        coords = [self._time, pcdim]
        pcs = xr.DataArray(pcs, coords=coords, name='pcs')
        pcs.coords.update(
            {coord.name: ('time', coord)
             for coord in self._time_ndcoords})
        return pcs
Example #7
0
    def varianceFraction(self, neigs=None):
        """Fractional EOF mode variances.

        The fraction of the total variance explained by each EOF mode,
        values between 0 and 1 inclusive.

        **Optional argument:**

        *neigs*
            Number of eigenvalues to return the fractional variance for.
            Defaults to all eigenvalues. If the number of eigenvalues
            requested is more than the number that are available, then
            fractional variances for all available eigenvalues will be
            returned.

        **Returns:**

        *variance_fractions*
            A `~xarray.DataArray` containing the fractional variances
            for each eigenvalue. The eigenvalues are numbered from 0 to
            *neigs* - 1.

        **Examples:**

        The fractional variance represented by each eigenvalue::

            variance_fractions = solver.varianceFraction()

        The fractional variance represented by the first 3 eigenvalues::

            variance_fractions = solver.VarianceFraction(neigs=3)

        """
        vf = self._solver.varianceFraction(neigs=neigs)
        eofdim = xr.Coordinate('mode',
                               range(vf.shape[0]),
                               attrs={'long_name': 'eof_mode_number'})
        coords = [eofdim]
        long_name = 'variance_fractions'
        vf = xr.DataArray(vf,
                          coords=coords,
                          name='variance_fractions',
                          attrs={'long_name': long_name})
        return vf
Example #8
0
    def eigenvalues(self, neigs=None):
        """Eigenvalues (decreasing variances) associated with each EOF.

        **Optional argument:**

        *neigs*
            Number of eigenvalues to return. Defaults to all
            eigenvalues.If the number of eigenvalues requested is more
            than the number that are available, then all available
            eigenvalues will be returned.

        **Returns:**

        *eigenvalues*
            A `~xarray.DataArray` containing the eigenvalues arranged
            largest to smallest. The eigenvalues are numbered from 0 to
            *neigs* - 1.

        **Examples:**

        All eigenvalues::

            eigenvalues = solver.eigenvalues()

        The first eigenvalue::

            eigenvalue1 = solver.eigenvalues(neigs=1)

        """
        lambdas = self._solver.eigenvalues(neigs=neigs)
        eofdim = xr.Coordinate('mode',
                               range(lambdas.shape[0]),
                               attrs={'long_name': 'eof_mode_number'})
        coords = [eofdim]
        long_name = 'eigenvalues'
        lambdas = xr.DataArray(lambdas,
                               coords=coords,
                               name='eigenvalues',
                               attrs={'long_name': long_name})
        return lambdas
Example #9
0
    def projectField(self, array, neofs=None, eofscaling=0, weighted=True):
        """Project a field onto the EOFs.

        Given a data set, projects it onto the EOFs to generate a
        corresponding set of pseudo-PCs.

        **Argument:**

        *field*
            An `xarray.DataArray` containing the field to project onto
            the EOFs. It must have the same corresponding spatial
            dimensions (including missing values in the same places) as
            the `Eof` input *dataset*. It may have a different length
            time dimension to the `Eof` input *dataset* or no time
            dimension at all. If a time dimension exists it must be the
            first dimension.

        **Optional arguments:**

        *neofs*
            Number of EOFs to project onto. Defaults to all EOFs. If the
            number of EOFs requested is more than the number that are
            available, then the field will be projected onto all
            available EOFs.

        *eofscaling*
            Set the scaling of the EOFs that are projected
            onto. The following values are accepted:

            * *0* : Un-scaled EOFs (default).
            * *1* : EOFs are divided by the square-root of their eigenvalue.
            * *2* : EOFs are multiplied by the square-root of their
              eigenvalue.

        *weighted*
            If *True* then the field is weighted using the same weights
            used for the EOF analysis prior to projection. If *False*
            then no weighting is applied. Defaults to *True* (weighting
            is applied). Generally only the default setting should be
            used.

        **Returns:**

        *pseudo_pcs*
            A `~xarray.DataArray` containing the pseudo-PCs. The PCs are
            numbered from 0 to *neofs* - 1.

        **Examples:**

        Project a field onto all EOFs::

            pseudo_pcs = solver.projectField(field)

        Project fields onto the three leading EOFs::

            pseudo_pcs = solver.projectField(field, neofs=3)

        """
        if not isinstance(array, xr.DataArray):
            raise TypeError('the input must be an xarray DataArray')
        array_name = array.name
        time_coords = find_time_coordinates(array)
        if len(time_coords) > 1:
            raise ValueError('multiple time dimensions are not allowed')
        if time_coords:
            has_time = True
            time_coord = time_coords[0]
            if array.dims[0] != time_coord.name:
                raise ValueError('time must be the first dimension, '
                                 'consider using the transpose() method')
            time_ndcoords, _, _ = categorise_ndcoords(array, time_coord.name)
        else:
            has_time = False
        pcs = self._solver.projectField(array.values,
                                        neofs=neofs,
                                        eofscaling=eofscaling,
                                        weighted=weighted)
        # Create the PCs DataArray.
        if pcs.ndim == 2:
            pcdim = xr.Coordinate('mode',
                                  range(pcs.shape[1]),
                                  attrs={'long_name': 'eof_mode_number'})
            pcs = xr.DataArray(
                pcs,
                coords=[time_coord, pcdim],
                name='pseudo_pcs',
                attrs={'long_name': '{}_pseudo_pcs'.format(array_name)})
        else:
            pcdim = xr.Coordinate('mode',
                                  range(pcs.shape[0]),
                                  attrs={'long_name': 'eof_mode_number'})
            pcs = xr.DataArray(
                pcs,
                coords=[pcdim],
                name='pseudo_pcs',
                attrs={'long_name': '{}_pseudo_pcs'.format(array_name)})
        if has_time:
            # Add non-dimension coordinates.
            pcs.coords.update({
                coord.name: (coord.dims, coord)
                for coord in self._time_ndcoords
            })
        return pcs
Example #10
0
    def eofsAsCovariance(self, neofs=None, pcscaling=1):
        """
        Empirical orthogonal functions (EOFs) expressed as the
        covariance between the principal component time series (PCs)
        and the time series of the `Eof` input *dataset* at each grid
        point.

        **Optional arguments:**

        *neofs*
            Number of EOFs to return. Defaults to all EOFs. If the
            number of EOFs requested is more than the number that are
            available, then all available EOFs will be returned.

        *pcscaling*
            Set the scaling of the PCs used to compute covariance. The
            following values are accepted:

            * *0* : Un-scaled PCs.
            * *1* : PCs are scaled to unit variance (divided by the
              square-root of their eigenvalue) (default).
            * *2* : PCs are multiplied by the square-root of their
              eigenvalue.

            The default is to divide PCs by the square-root of their
            eigenvalue so that the PCs are scaled to unit variance
            (option 1).

        **Returns:**

        *eofs*
           A `~xarray.DataArray` containing the ordered EOFs. The EOFs
           are numbered from 0 to *neofs* - 1.

        **Examples:**

        All EOFs::

            eofs = solver.eofsAsCovariance()

        The leading EOF::

            eof1 = solver.eofsAsCovariance(neofs=1)

        The leading EOF using un-scaled PCs::

            eof1 = solver.eofsAsCovariance(neofs=1, pcscaling=0)

        """
        eofs = self._solver.eofsAsCovariance(neofs, pcscaling)
        eofdim = xr.Coordinate('mode',
                               range(eofs.shape[0]),
                               attrs={'long_name': 'eof_mode_number'})
        coords = [eofdim] + self._coords
        long_name = 'covariance_between_pcs_and_{!s}'.format(self._name)
        eofs = xr.DataArray(eofs,
                            coords=coords,
                            name='eofs',
                            attrs={'long_name': long_name})
        eofs.coords.update({
            coord.name: (coord.dims, coord)
            for coord in self._space_ndcoords
        })
        return eofs
Example #11
0
def add_wcs_coords(hdu, zarr_group=None, dataset=None, dtype="float32"):
    """Using FITS WCS, create materialised coordinate arrays

    This may triple the data footprint of the data, as the coordinates can easily
    be as big as the data itsel.

    Must provide zarr_group or dataset

    Parameters
    ----------
    hdu: astropy.io.fits.HDU or dict
        Input with WCS header information. If a dict, it is {key: attribute} of the data.
    zarr_group: zarr.Group
        To write the new arrays into
    dataset: xr.Dataset
        To create new coordinate arrays in; this is not necessarily written anywhere
    dtype: str
        Output numpy dtype

    Returns
    -------
    If dataset is given, returns the modified dataset.
    """
    from astropy.wcs import WCS
    from astropy.io import fits

    if zarr_group is None and dataset is None:
        raise ValueError("please provide a zarr group or xarray dataset")

    if isinstance(hdu, dict):
        # assume dict-like
        head = fits.Header()
        hdu2 = hdu.copy()
        hdu2.pop("COMMENT", None)  # comment fields can be non-standard
        head.update(hdu2)
        hdu = fits.PrimaryHDU(header=head)
    elif not isinstance(hdu, fits.hdu.base._BaseHDU):
        raise TypeError("`hdu` must be a FITS HDU or dict")
    nax = hdu.header["NAXIS"]
    shape = tuple(int(hdu.header[f'NAXIS{i}']) for i in range(nax, 0, -1))

    wcs = WCS(hdu)
    coords = [
        coo.ravel() for coo in np.meshgrid(*(np.arange(sh) for sh in shape))
    ]  # ?[::-1]
    world_coords = wcs.pixel_to_world(*coords)
    for i, (name,
            world_coord) in enumerate(zip(wcs.axis_type_names, world_coords)):
        dims = ['z', 'y', 'x'][3 - len(shape):]
        attrs = {
            "unit": world_coord.unit.name,
            "type": hdu.header[f"CTYPE{i + 1}"],
            "_ARRAY_DIMENSIONS": dims
        }
        if zarr_group is not None:
            arr = zarr_group.empty(name,
                                   shape=shape,
                                   chunks=shape,
                                   overwrite=True,
                                   dtype=dtype)
            arr.attrs.update(attrs)
            arr[:] = world_coord.value.reshape(shape)
        if dataset is not None:
            import xarray as xr
            coo = xr.Coordinate(data=world_coord.value.reshape(shape),
                                dims=dims,
                                attrs=attrs)
            dataset = dataset.assign_coordinates(name=coo)
    if dataset is not None:
        return dataset