def __init__(self, dataset, weights="none", center=True, ddof=1): """Create an Eof object. **Argument:** *dataset* A :py:mod:`cdms2` variable containing the data to be analyzed. Time must be the first dimension. Missing values are allowed provided that they are constant with time (e.g., values of an oceanographic field over land). **Optional arguments:** *weights* Sets the weighting method. The following values are accepted: * *"area"* : Square-root of grid cell area normalized by total area. Requires a latitude-longitude grid to be present in the input :py:mod:`cdms2` variable *dataset*. This is a fairly standard weighting strategy. If you are unsure which method to use and you have gridded data then this should be your first choice. * *"coslat"* : Square-root of cosine of latitude (*"cos_lat"* is also accepted). Requires a latitude dimension to be present in the input :py:mod:`cdms2` variable *dataset*. * *"none"* : Equal weights for all grid points (default). * *None* : Same as *"none"*. An array of weights may also be supplied instead of specifying a weighting method. *center* If *True*, the mean along the first axis of the input data set (the time-mean) will be removed prior to analysis. If *False*, the mean along the first axis will not be removed. Defaults to *True* (mean is removed). Generally this option should be set to *True* as the covariance interpretation relies on input data being anomalies with a time-mean of 0. A valid reson for turning this off would be if you have already generated an anomaly data set. Setting to *True* has the useful side-effect of propagating missing values along the time-dimension, ensuring the solver will work even if missing values occur at different locations at different times. *ddof* 'Delta degrees of freedom'. The divisor used to normalize the covariance matrix is *N - ddof* where *N* is the number of samples. Defaults to *1*. **Examples:** EOF analysis with area-weighting for the input field: >>> from eof2 import Eof >>> eofobj = Eof(field, weights="area") """ # Check that dataset is recognised by cdms2 as a variable. if not cdms2.isVariable(dataset): raise EofError("the input data must be a cdms2 variable") # Store the time axis as an instance variable. self._timeax = dataset.getTime() # Verify that a time axis was found, getTime returns None when a # time axis is not found. if self._timeax is None: raise EofError("time axis not found") # Check the dimension order of the input, time must be the first # dimension. order = dataset.getOrder() if order[0] != "t": raise EofError("time must be the first dimension") # Verify the presence of at least one spatial dimension. The # instance variable channels will also be used as a partial axis # list when constructing meta-data. It contains the spatial # dimensions. self._channels = dataset.getAxisList() self._channels.remove(self._timeax) if len(self._channels) < 1: raise EofError("one or more spatial dimensions are required") # Store the missing value attribute of the data set in an # instance variable so that it is recoverable later. self._missing_value = dataset.getMissing() # Generate an appropriate set of weights for the input dataset. There # are several weighting schemes. The "area" weighting scheme requires # a latitude-longitude grid to be present, the "cos_lat" scheme only # requires a latitude dimension. if weights in ("none", None): # No weights requested, set the weight array to None. wtarray = None else: try: # Generate a weights array of the appropriate kind, with a # shape compatible with the data set. scheme = weights.lower() wtarray = weights_array(dataset, scheme=scheme) except AttributeError: # Weights is not a string, assume it is an array. wtarray = weights except EofToolError, err: # Weights is not recognized, raise an error. raise EofError(err)
def __init__(self, *datasets, **kwargs): """Create a MultipleEof object. The EOF solution is computed at initialization time. Method calls are used to retrieve computed quantities. **Arguments:** *\*datasets* One or more :py:mod:`cdms2` variables containing the data to be analyzed. Time must be the first dimension of each variable. Missing values are allowed provided that they are constant with time (e.g., values of an oceanographic field over land). **Optional arguments:** *weights* Sets the weighting method. The following values are accepted: * *"area"* : Square-root of grid cell area normalized by total area. Requires a latitude-longitude grid to be present in the corresponding :py:mod:`cdms2` variable in *\*datasets*. This is a fairly standard weighting strategy. If you are unsure which method to use and you have gridded data then this should be your first choice. * *"coslat"* : Square-root of cosine of latitude (*"cos_lat"* is also accepted). Requires a latitude dimension to be present in the corresponding :py:mod:`cmds2` variable in *\*datasets*. * *"none"* : Equal weights for all grid points (default). * *None* : Same as *"none"*. A sequence of values may be passed to use different weighting for each data set. Arrays of weights may also be supplied instead of specifying a weighting method. *center* If *True*, the mean along the first axis of the input data sets (the time-mean) will be removed prior to analysis. If *False*, the mean along the first axis will not be removed. Defaults to *True* (mean is removed). Generally this option should be set to *True* as the covariance interpretation relies on input data being anomalies with a time-mean of 0. A valid reson for turning this off would be if you have already generated an anomaly data set. Setting to *True* has the useful side-effect of propagating missing values along the time-dimension, ensuring the solver will work even if missing values occur at different locations at different times. *ddof* 'Delta degrees of freedom'. The divisor used to normalize the covariance matrix is *N - ddof* where *N* is the number of samples. Defaults to *1*. **Examples:** EOF analysis with area-weighting using two input fields: >>> from eof2 import Eof >>> eofobj = Eof(field_a, field_b, weights="area") """ # Handle keyword arguments manually. keywords = {"weights": "none", "center": True, "ddof": 1} for kwarg in kwargs: if kwarg not in keywords.keys(): raise EofError("invalid argument: %s." % kwarg) weights = kwargs.get("weights", keywords["weights"]) center = kwargs.get("center", keywords["center"]) ddof = kwargs.get("ddof", keywords["ddof"]) # Record the number of datasets. self._numdsets = len(datasets) # Ensure the weights are specified one per dataset. if weights in ("none", None, "area", "cos_lat", "coslat"): weights = [weights] * self._numdsets elif len(weights) != self._numdsets: raise EofError("number of weights and data sets differs") # Record dimension information, missing values and compute weights. self._multitimeaxes = list() self._multichannels = list() self._multimissing = list() passweights = list() for dataset, weight in zip(datasets, weights): if not cdms2.isVariable(dataset): raise EofError("the input data set must be a cdms2 variable") # Ensure a time dimension exists. timeaxis = dataset.getTime() if timeaxis is None: raise EofError("time axis not found") self._multitimeaxes.append(timeaxis) # Ensure the time dimension is the first dimension. order = dataset.getOrder() if order[0] != "t": raise EofError("time must be the first dimension") # Record the other dimensions. channels = dataset.getAxisList() channels.remove(timeaxis) if len(channels) < 1: raise EofError("one or more spatial dimensions are required") self._multichannels.append(channels) # Record the missing values. self._multimissing.append(dataset.getMissing()) # Compute weights as required. if weight in ("none", None): passweights.append(None) else: try: wtarray = weights_array(dataset, scheme=weight.lower()) passweights.append(wtarray) except AttributeError: # Weight specification is not a string. Assume it is an array # of weights. passweights.append(weight) except EofToolError, err: # Another error occured, raise it as an EOF error. raise EofError(err)
def __init__(self, *datasets, **kwargs): """Create a MultipleEof object. The EOF solution is computed at initialization time. Method calls are used to retrieve computed quantities. **Arguments:** *\*datasets* One or more :py:mod:`cdms2` variables containing the data to be analyzed. Time must be the first dimension of each variable. Missing values are allowed provided that they are constant with time (e.g., values of an oceanographic field over land). **Optional arguments:** *weights* Sets the weighting method. The following values are accepted: * *"area"* : Square-root of grid cell area normalized by total area. Requires a latitude-longitude grid to be present in the corresponding :py:mod:`cdms2` variable in *\*datasets*. This is a fairly standard weighting strategy. If you are unsure which method to use and you have gridded data then this should be your first choice. * *"coslat"* : Square-root of cosine of latitude (*"cos_lat"* is also accepted). Requires a latitude dimension to be present in the corresponding :py:mod:`cmds2` variable in *\*datasets*. * *"none"* : Equal weights for all grid points (default). * *None* : Same as *"none"*. A sequence of values may be passed to use different weighting for each data set. Arrays of weights may also be supplied instead of specifying a weighting method. *center* If *True*, the mean along the first axis of the input data sets (the time-mean) will be removed prior to analysis. If *False*, the mean along the first axis will not be removed. Defaults to *True* (mean is removed). Generally this option should be set to *True* as the covariance interpretation relies on input data being anomalies with a time-mean of 0. A valid reson for turning this off would be if you have already generated an anomaly data set. Setting to *True* has the useful side-effect of propagating missing values along the time-dimension, ensuring the solver will work even if missing values occur at different locations at different times. *ddof* 'Delta degrees of freedom'. The divisor used to normalize the covariance matrix is *N - ddof* where *N* is the number of samples. Defaults to *1*. **Examples:** EOF analysis with area-weighting using two input fields: >>> from eof2 import Eof >>> eofobj = Eof(field_a, field_b, weights="area") """ # Handle keyword arguments manually. keywords = {"weights": "none", "center": True, "ddof":1} for kwarg in kwargs: if kwarg not in keywords.keys(): raise EofError("invalid argument: %s." % kwarg) weights = kwargs.get("weights", keywords["weights"]) center = kwargs.get("center", keywords["center"]) ddof = kwargs.get("ddof", keywords["ddof"]) # Record the number of datasets. self._numdsets = len(datasets) # Ensure the weights are specified one per dataset. if weights in ("none", None, "area", "cos_lat", "coslat"): weights = [weights] * self._numdsets elif len(weights) != self._numdsets: raise EofError("number of weights and data sets differs") # Record dimension information, missing values and compute weights. self._multitimeaxes = list() self._multichannels = list() self._multimissing = list() passweights = list() for dataset, weight in zip(datasets, weights): if not cdms2.isVariable(dataset): raise EofError("the input data set must be a cdms2 variable") # Ensure a time dimension exists. timeaxis = dataset.getTime() if timeaxis is None: raise EofError("time axis not found") self._multitimeaxes.append(timeaxis) # Ensure the time dimension is the first dimension. order = dataset.getOrder() if order[0] != "t": raise EofError("time must be the first dimension") # Record the other dimensions. channels = dataset.getAxisList() channels.remove(timeaxis) if len(channels) < 1: raise EofError("one or more spatial dimensions are required") self._multichannels.append(channels) # Record the missing values. self._multimissing.append(dataset.getMissing()) # Compute weights as required. if weight in ("none", None): passweights.append(None) else: try: wtarray = weights_array(dataset, scheme=weight.lower()) passweights.append(wtarray) except AttributeError: # Weight specification is not a string. Assume it is an array # of weights. passweights.append(weight) except EofToolError, err: # Another error occured, raise it as an EOF error. raise EofError(err)