Ejemplo n.º 1
0
    def __init__(self, dataset, weights="none", center=True, ddof=1):
        """Create an Eof object.
        
        **Argument:**

        *dataset*
            A :py:mod:`cdms2` variable containing the data to be
            analyzed. Time must be the first dimension. Missing values
            are allowed provided that they are constant with time (e.g.,
            values of an oceanographic field over land).
        
        **Optional arguments:**

        *weights*
            Sets the weighting method. The following values are
            accepted:

            * *"area"* : Square-root of grid cell area normalized by
              total area. Requires a latitude-longitude grid to be
              present in the input :py:mod:`cdms2` variable *dataset*.
              This is a fairly standard weighting strategy. If you are
              unsure which method to use and you have gridded data then
              this should be your first choice.

            * *"coslat"* : Square-root of cosine of latitude
              (*"cos_lat"* is also accepted). Requires a latitude
              dimension to be present in the input :py:mod:`cdms2`
              variable *dataset*.

            * *"none"* : Equal weights for all grid points (default).

            * *None* : Same as *"none"*.

             An array of weights may also be supplied instead of
             specifying a weighting method.

        *center*
            If *True*, the mean along the first axis of the input data
            set (the time-mean) will be removed prior to analysis. If
            *False*, the mean along the first axis will not be removed.
            Defaults to *True* (mean is removed). Generally this option
            should be set to *True* as the covariance interpretation
            relies on input data being anomalies with a time-mean of 0.
            A valid reson for turning this off would be if you have
            already generated an anomaly data set. Setting to *True* has
            the useful side-effect of propagating missing values along
            the time-dimension, ensuring the solver will work even if
            missing values occur at different locations at different
            times.

        *ddof*
            'Delta degrees of freedom'. The divisor used to normalize
            the covariance matrix is *N - ddof* where *N* is the
            number of samples. Defaults to *1*.

        **Examples:**

        EOF analysis with area-weighting for the input field:

        >>> from eof2 import Eof
        >>> eofobj = Eof(field, weights="area")

        """
        # Check that dataset is recognised by cdms2 as a variable.
        if not cdms2.isVariable(dataset):
            raise EofError("the input data must be a cdms2 variable")
        # Store the time axis as an instance variable.
        self._timeax = dataset.getTime()
        # Verify that a time axis was found, getTime returns None when a
        # time axis is not found.
        if self._timeax is None:
            raise EofError("time axis not found")
        # Check the dimension order of the input, time must be the first
        # dimension.
        order = dataset.getOrder()
        if order[0] != "t":
            raise EofError("time must be the first dimension")
        # Verify the presence of at least one spatial dimension. The
        # instance variable channels will also be used as a partial axis
        # list when constructing meta-data. It contains the spatial
        # dimensions.
        self._channels = dataset.getAxisList()
        self._channels.remove(self._timeax)
        if len(self._channels) < 1:
            raise EofError("one or more spatial dimensions are required")
        # Store the missing value attribute of the data set in an
        # instance variable so that it is recoverable later.
        self._missing_value = dataset.getMissing()
        # Generate an appropriate set of weights for the input dataset. There
        # are several weighting schemes. The "area" weighting scheme requires
        # a latitude-longitude grid to be present, the "cos_lat" scheme only
        # requires a latitude dimension.
        if weights in ("none", None):
            # No weights requested, set the weight array to None.
            wtarray = None
        else:
            try:
                # Generate a weights array of the appropriate kind, with a
                # shape compatible with the data set.
                scheme = weights.lower()
                wtarray = weights_array(dataset, scheme=scheme)
            except AttributeError:
                # Weights is not a string, assume it is an array.
                wtarray = weights
            except EofToolError, err:
                # Weights is not recognized, raise an error.
                raise EofError(err)
Ejemplo n.º 2
0
    def __init__(self, *datasets, **kwargs):
        """Create a MultipleEof object.

        The EOF solution is computed at initialization time. Method
        calls are used to retrieve computed quantities.

        **Arguments:**

        *\*datasets*
            One or more :py:mod:`cdms2` variables containing the data to
            be analyzed. Time must be the first dimension of each
            variable. Missing values are allowed provided that they are
            constant with time (e.g., values of an oceanographic field
            over land).
        
        **Optional arguments:**

        *weights*
            Sets the weighting method. The following values are
            accepted:

            * *"area"* : Square-root of grid cell area normalized by
              total area. Requires a latitude-longitude grid to be
              present in the corresponding :py:mod:`cdms2` variable
              in *\*datasets*. This is a fairly standard weighting
              strategy. If you are unsure which method to use and you
              have gridded data then this should be your first choice.

            * *"coslat"* : Square-root of cosine of latitude
              (*"cos_lat"* is also accepted). Requires a latitude
              dimension to be present in the corresponding
              :py:mod:`cmds2` variable in *\*datasets*.

            * *"none"* : Equal weights for all grid points (default).

            * *None* : Same as *"none"*.

             A sequence of values may be passed to use different
             weighting for each data set. Arrays of weights may also
             be supplied instead of specifying a weighting method.

        *center*
            If *True*, the mean along the first axis of the input data
            sets (the time-mean) will be removed prior to analysis. If
            *False*, the mean along the first axis will not be removed.
            Defaults to *True* (mean is removed). Generally this option
            should be set to *True* as the covariance interpretation
            relies on input data being anomalies with a time-mean of 0.
            A valid reson for turning this off would be if you have
            already generated an anomaly data set. Setting to *True* has
            the useful side-effect of propagating missing values along
            the time-dimension, ensuring the solver will work even if
            missing values occur at different locations at different
            times.

        *ddof*
            'Delta degrees of freedom'. The divisor used to normalize
            the covariance matrix is *N - ddof* where *N* is the
            number of samples. Defaults to *1*.

        **Examples:**

        EOF analysis with area-weighting using two input fields:

        >>> from eof2 import Eof
        >>> eofobj = Eof(field_a, field_b, weights="area")

        """
        # Handle keyword arguments manually.
        keywords = {"weights": "none", "center": True, "ddof": 1}
        for kwarg in kwargs:
            if kwarg not in keywords.keys():
                raise EofError("invalid argument: %s." % kwarg)
        weights = kwargs.get("weights", keywords["weights"])
        center = kwargs.get("center", keywords["center"])
        ddof = kwargs.get("ddof", keywords["ddof"])
        # Record the number of datasets.
        self._numdsets = len(datasets)
        # Ensure the weights are specified one per dataset.
        if weights in ("none", None, "area", "cos_lat", "coslat"):
            weights = [weights] * self._numdsets
        elif len(weights) != self._numdsets:
            raise EofError("number of weights and data sets differs")
        # Record dimension information, missing values and compute weights.
        self._multitimeaxes = list()
        self._multichannels = list()
        self._multimissing = list()
        passweights = list()
        for dataset, weight in zip(datasets, weights):
            if not cdms2.isVariable(dataset):
                raise EofError("the input data set must be a cdms2 variable")
            # Ensure a time dimension exists.
            timeaxis = dataset.getTime()
            if timeaxis is None:
                raise EofError("time axis not found")
            self._multitimeaxes.append(timeaxis)
            # Ensure the time dimension is the first dimension.
            order = dataset.getOrder()
            if order[0] != "t":
                raise EofError("time must be the first dimension")
            # Record the other dimensions.
            channels = dataset.getAxisList()
            channels.remove(timeaxis)
            if len(channels) < 1:
                raise EofError("one or more spatial dimensions are required")
            self._multichannels.append(channels)
            # Record the missing values.
            self._multimissing.append(dataset.getMissing())
            # Compute weights as required.
            if weight in ("none", None):
                passweights.append(None)
            else:
                try:
                    wtarray = weights_array(dataset, scheme=weight.lower())
                    passweights.append(wtarray)
                except AttributeError:
                    # Weight specification is not a string. Assume it is an array
                    # of weights.
                    passweights.append(weight)
                except EofToolError, err:
                    # Another error occured, raise it as an EOF error.
                    raise EofError(err)
Ejemplo n.º 3
0
    def __init__(self, *datasets, **kwargs):
        """Create a MultipleEof object.

        The EOF solution is computed at initialization time. Method
        calls are used to retrieve computed quantities.

        **Arguments:**

        *\*datasets*
            One or more :py:mod:`cdms2` variables containing the data to
            be analyzed. Time must be the first dimension of each
            variable. Missing values are allowed provided that they are
            constant with time (e.g., values of an oceanographic field
            over land).
        
        **Optional arguments:**

        *weights*
            Sets the weighting method. The following values are
            accepted:

            * *"area"* : Square-root of grid cell area normalized by
              total area. Requires a latitude-longitude grid to be
              present in the corresponding :py:mod:`cdms2` variable
              in *\*datasets*. This is a fairly standard weighting
              strategy. If you are unsure which method to use and you
              have gridded data then this should be your first choice.

            * *"coslat"* : Square-root of cosine of latitude
              (*"cos_lat"* is also accepted). Requires a latitude
              dimension to be present in the corresponding
              :py:mod:`cmds2` variable in *\*datasets*.

            * *"none"* : Equal weights for all grid points (default).

            * *None* : Same as *"none"*.

             A sequence of values may be passed to use different
             weighting for each data set. Arrays of weights may also
             be supplied instead of specifying a weighting method.

        *center*
            If *True*, the mean along the first axis of the input data
            sets (the time-mean) will be removed prior to analysis. If
            *False*, the mean along the first axis will not be removed.
            Defaults to *True* (mean is removed). Generally this option
            should be set to *True* as the covariance interpretation
            relies on input data being anomalies with a time-mean of 0.
            A valid reson for turning this off would be if you have
            already generated an anomaly data set. Setting to *True* has
            the useful side-effect of propagating missing values along
            the time-dimension, ensuring the solver will work even if
            missing values occur at different locations at different
            times.

        *ddof*
            'Delta degrees of freedom'. The divisor used to normalize
            the covariance matrix is *N - ddof* where *N* is the
            number of samples. Defaults to *1*.

        **Examples:**

        EOF analysis with area-weighting using two input fields:

        >>> from eof2 import Eof
        >>> eofobj = Eof(field_a, field_b, weights="area")

        """
        # Handle keyword arguments manually.
        keywords = {"weights": "none", "center": True, "ddof":1}
        for kwarg in kwargs:
            if kwarg not in keywords.keys():
                raise EofError("invalid argument: %s." % kwarg)
        weights = kwargs.get("weights", keywords["weights"])
        center = kwargs.get("center", keywords["center"])
        ddof = kwargs.get("ddof", keywords["ddof"])
        # Record the number of datasets.
        self._numdsets = len(datasets)
        # Ensure the weights are specified one per dataset.
        if weights in ("none", None, "area", "cos_lat", "coslat"):
            weights = [weights] * self._numdsets
        elif len(weights) != self._numdsets:
            raise EofError("number of weights and data sets differs")
        # Record dimension information, missing values and compute weights.
        self._multitimeaxes = list()
        self._multichannels = list()
        self._multimissing = list()
        passweights = list()
        for dataset, weight in zip(datasets, weights):
            if not cdms2.isVariable(dataset):
                raise EofError("the input data set must be a cdms2 variable")
            # Ensure a time dimension exists.
            timeaxis = dataset.getTime()
            if timeaxis is None:
                raise EofError("time axis not found")
            self._multitimeaxes.append(timeaxis)
            # Ensure the time dimension is the first dimension.
            order = dataset.getOrder()
            if order[0] != "t":
                raise EofError("time must be the first dimension")
            # Record the other dimensions.
            channels = dataset.getAxisList()
            channels.remove(timeaxis)
            if len(channels) < 1:
                raise EofError("one or more spatial dimensions are required")
            self._multichannels.append(channels)
            # Record the missing values.
            self._multimissing.append(dataset.getMissing())
            # Compute weights as required.
            if weight in ("none", None):
                passweights.append(None)
            else:
                try:
                    wtarray = weights_array(dataset, scheme=weight.lower())
                    passweights.append(wtarray)
                except AttributeError:
                    # Weight specification is not a string. Assume it is an array
                    # of weights.
                    passweights.append(weight)
                except EofToolError, err:
                    # Another error occured, raise it as an EOF error.
                    raise EofError(err)
Ejemplo n.º 4
0
    def __init__(self, dataset, weights="none", center=True, ddof=1):
        """Create an Eof object.
        
        **Argument:**

        *dataset*
            A :py:mod:`cdms2` variable containing the data to be
            analyzed. Time must be the first dimension. Missing values
            are allowed provided that they are constant with time (e.g.,
            values of an oceanographic field over land).
        
        **Optional arguments:**

        *weights*
            Sets the weighting method. The following values are
            accepted:

            * *"area"* : Square-root of grid cell area normalized by
              total area. Requires a latitude-longitude grid to be
              present in the input :py:mod:`cdms2` variable *dataset*.
              This is a fairly standard weighting strategy. If you are
              unsure which method to use and you have gridded data then
              this should be your first choice.

            * *"coslat"* : Square-root of cosine of latitude
              (*"cos_lat"* is also accepted). Requires a latitude
              dimension to be present in the input :py:mod:`cdms2`
              variable *dataset*.

            * *"none"* : Equal weights for all grid points (default).

            * *None* : Same as *"none"*.

             An array of weights may also be supplied instead of
             specifying a weighting method.

        *center*
            If *True*, the mean along the first axis of the input data
            set (the time-mean) will be removed prior to analysis. If
            *False*, the mean along the first axis will not be removed.
            Defaults to *True* (mean is removed). Generally this option
            should be set to *True* as the covariance interpretation
            relies on input data being anomalies with a time-mean of 0.
            A valid reson for turning this off would be if you have
            already generated an anomaly data set. Setting to *True* has
            the useful side-effect of propagating missing values along
            the time-dimension, ensuring the solver will work even if
            missing values occur at different locations at different
            times.

        *ddof*
            'Delta degrees of freedom'. The divisor used to normalize
            the covariance matrix is *N - ddof* where *N* is the
            number of samples. Defaults to *1*.

        **Examples:**

        EOF analysis with area-weighting for the input field:

        >>> from eof2 import Eof
        >>> eofobj = Eof(field, weights="area")

        """
        # Check that dataset is recognised by cdms2 as a variable.
        if not cdms2.isVariable(dataset):
            raise EofError("the input data must be a cdms2 variable")
        # Store the time axis as an instance variable.
        self._timeax = dataset.getTime()
        # Verify that a time axis was found, getTime returns None when a
        # time axis is not found.
        if self._timeax is None:
            raise EofError("time axis not found")
        # Check the dimension order of the input, time must be the first
        # dimension.
        order = dataset.getOrder()
        if order[0] != "t":
            raise EofError("time must be the first dimension")
        # Verify the presence of at least one spatial dimension. The
        # instance variable channels will also be used as a partial axis
        # list when constructing meta-data. It contains the spatial
        # dimensions.
        self._channels = dataset.getAxisList()
        self._channels.remove(self._timeax)
        if len(self._channels) < 1:
            raise EofError("one or more spatial dimensions are required")
        # Store the missing value attribute of the data set in an
        # instance variable so that it is recoverable later.
        self._missing_value = dataset.getMissing()
        # Generate an appropriate set of weights for the input dataset. There
        # are several weighting schemes. The "area" weighting scheme requires
        # a latitude-longitude grid to be present, the "cos_lat" scheme only
        # requires a latitude dimension.
        if weights in ("none", None):
            # No weights requested, set the weight array to None.
            wtarray = None
        else:
            try:
                # Generate a weights array of the appropriate kind, with a
                # shape compatible with the data set.
                scheme = weights.lower()
                wtarray = weights_array(dataset, scheme=scheme)
            except AttributeError:
                # Weights is not a string, assume it is an array.
                wtarray = weights
            except EofToolError, err:
                # Weights is not recognized, raise an error.
                raise EofError(err)