def pearsonr(x, y, axis=None): ''' Calculates a Pearson correlation coefficient and the p-value for testing non-correlation. The Pearson correlation coefficient measures the linear relationship between two datasets. Strictly speaking, Pearson’s correlation requires that each dataset be normally distributed, and not necessarily zero-mean. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact linear relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases. The p-value roughly indicates the probability of an uncorrelated system producing datasets that have a Pearson correlation at least as extreme as the one computed from these datasets. The p-values are not entirely reliable but are probably reasonable for datasets larger than 500 or so. :param x: (*array_like*) x data array. :param y: (*array_like*) y data array. :param axis: (*int*) By default, the index is into the flattened array, otherwise along the specified axis. :returns: Pearson’s correlation coefficient and 2-tailed p-value. ''' if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) if axis is None: r = StatsUtil.pearsonr(x.asarray(), y.asarray()) return r[0], r[1] else: r = StatsUtil.pearsonr(x.array, y.array, axis) return MIArray(r[0]), MIArray(r[1])
def cov(m, y=None, rowvar=True, bias=False): ''' Estimate a covariance matrix. :param m: (*array_like*) A 1-D or 2-D array containing multiple variables and observations. :param y: (*array_like*) Optional. An additional set of variables and observations. y has the same form as that of m. :param rowvar: (*boolean*) If ``rowvar`` is True (default), then each row represents a variable, with observations in the columns. Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. :param bias: (*boolean*) Default normalization (False) is by (N - 1), where N is the number of observations given (unbiased estimate). If bias is True, then normalization is by N. :returns: Covariance. ''' if isinstance(m, list): m = MIArray(ArrayUtil.array(m)) if rowvar == True and m.ndim == 2: m = m.T if y is None: r = StatsUtil.cov(m.asarray(), not bias) if isinstance(r, Array): return MIArray(r) else: return r else: if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) if rowvar == True and y.ndim == 2: y = y.T r = StatsUtil.cov(m.asarray(), y.asarray(), not bias) return MIArray(r)
def __call__(self, x): ''' Evaluate the interpolate vlaues. :param x: (*array_like*) Points to evaluate the interpolant at. ''' if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(x, (MIArray, DimArray)): x = x.asarray() r = InterpUtil.evaluate(self._func, x) if isinstance(r, float): return r else: return MIArray(r)
def ttest_1samp(a, popmean): ''' Calculate the T-test for the mean of ONE group of scores. This is a two-sided test for the null hypothesis that the expected value (mean) of a sample of independent observations a is equal to the given population mean, popmean. :param a: (*array_like*) Sample observation. :param popmean: (*float*) Expected value in null hypothesis. :returns: t-statistic and p-value ''' if isinstance(a, list): a = MIArray(ArrayUtil.array(x)) r = StatsUtil.tTest(a.asarray(), popmean) return r[0], r[1]
def mlinregress(y, x): ''' Implements ordinary least squares (OLS) to estimate the parameters of a multiple linear regression model. :param y: (*array_like*) Y sample data - one dimension array. :param x: (*array_like*) X sample data - two dimension array. :returns: Estimated regression parameters and residuals. ''' if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) r = StatsUtil.multipleLineRegress_OLS(y.asarray(), x.asarray()) return MIArray(r[0]), MIArray(r[1])
def __init__(self, data=None, index=None, name=None, series=None): ''' One-dimensional array with axis labels (including time series). :param data: (*array_like*) One-dimensional array data. :param index: (*list*) Data index list. Values must be unique and hashable, same length as data. :param name: (*string*) Series name. ''' if series is None: if isinstance(data, (list, tuple)): data = minum.array(data) if index is None: index = range(0, len(data)) else: if len(data) != len(index): raise ValueError('Wrong length of index!') if isinstance(index, (MIArray, DimArray)): index = index.tolist() if isinstance(index, Index): self._index = index else: self._index = Index.factory(index) self._data = data self._series = MISeries(data.array, self._index._index, name) else: self._series = series self._data = MIArray(self._series.getData()) self._index = Index.factory(index=self._series.getIndex())
def chi2_contingency(observed): ''' Chi-square test of independence of variables in a contingency table. This function computes the chi-square statistic and p-value for the hypothesis test of independence of the observed frequencies in the contingency table observed. :param observed: (*array_like*) The contingency table. The table contains the observed frequencies (i.e. number of occurrences) in each category. In the two-dimensional case, the table is often described as an `R x C table`. :returns: Chi-square statistic and p-value ''' if isinstance(observed, list): observed = MIArray(ArrayUtil.array(observed)) r = StatsUtil.chiSquareTest(observed.asarray()) return r[0], r[1]
def ttest_rel(a, b): ''' Calculates the T-test on TWO RELATED samples of scores, a and b. This is a two-sided test for the null hypothesis that 2 related or repeated samples have identical average (expected) values. :param a: (*array_like*) Sample data a. :param b: (*array_like*) Sample data b. :returns: t-statistic and p-value ''' if isinstance(a, list): a = MIArray(ArrayUtil.array(a)) if isinstance(b, list): b = MIArray(ArrayUtil.array(b)) r = StatsUtil.pairedTTest(a.asarray(), b.asarray()) return r[0], r[1]
def covariance(x, y, bias=False): ''' Calculate covariance of two array. :param x: (*array_like*) A 1-D array containing multiple variables and observations. :param y: (*array_like*) An additional set of variables and observations. y has the same form as that of x. :param bias: (*boolean*) Default normalization (False) is by (N - 1), where N is the number of observations given (unbiased estimate). If bias is True, then normalization is by N. returns: Covariance ''' if isinstance(x, (list, tuple)): x = MIArray(ArrayUtil.array(x)) if isinstance(y, (list, tuple)): y = MIArray(ArrayUtil.array(y)) r = StatsUtil.covariance(x.asarray(), y.asarray(), bias) return r
def ttest_ind(a, b): ''' Calculates the T-test for the means of TWO INDEPENDENT samples of scores. This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations have identical variances. :param a: (*array_like*) Sample data a. :param b: (*array_like*) Sample data b. :returns: t-statistic and p-value ''' if isinstance(a, list): a = MIArray(ArrayUtil.array(a)) if isinstance(b, list): b = MIArray(ArrayUtil.array(b)) r = StatsUtil.tTest(a.asarray(), b.asarray()) return r[0], r[1]
def percentile(a, q, axis=None): ''' Compute the qth percentile of the data along the specified axis. :param a: (*array_like*) Input array. :param q: (*float*) float in range of [0,100]. Percentile to compute, which must be between 0 and 100 inclusive. :param axis: (*int*) Axis or axes along which the percentiles are computed. The default is to compute the percentile along a flattened version of the array. :returns: (*float*) qth percentile value. ''' if isinstance(a, list): a = MIArray(ArrayUtil.array(x)) if axis is None: r = StatsUtil.percentile(a.asarray(), q) else: r = StatsUtil.percentile(a.asarray(), q, axis) r = MIArray(r) return r
def linregress(x, y, outvdn=False): ''' Calculate a linear least-squares regression for two sets of measurements. :param x, y: (*array_like*) Two sets of measurements. Both arrays should have the same length. :param outvdn: (*boolean*) Output validate data number or not. Default is False. :returns: Result slope, intercept, relative coefficient, two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero, standard error of the estimated gradient, validate data number (remove NaN values). ''' if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) r = ArrayMath.lineRegress(x.asarray(), y.asarray()) if outvdn: return r[0], r[1], r[2], r[3], r[4], r[5] else: return r[0], r[1], r[2], r[3], r[4]
def kendalltau(x, y): ''' Calculates Kendall's tau, a correlation measure for ordinal data. Kendall's tau is a measure of the correspondence between two rankings. Values close to 1 indicate strong agreement, values close to -1 indicate strong disagreement. This is the 1945 "tau-b" version of Kendall's tau [2]_, which can account for ties and which reduces to the 1938 "tau-a" version [1]_ in absence of ties. :param x: (*array_like*) x data array. :param y: (*array_like*) y data array. :returns: Correlation. Notes ----- The definition of Kendall's tau that is used is [2]_:: tau = (P - Q) / sqrt((P + Q + T) * (P + Q + U)) where P is the number of concordant pairs, Q the number of discordant pairs, T the number of ties only in `x`, and U the number of ties only in `y`. If a tie occurs for the same pair in both `x` and `y`, it is not added to either T or U. References ---------- .. [1] Maurice G. Kendall, "A New Measure of Rank Correlation", Biometrika Vol. 30, No. 1/2, pp. 81-93, 1938. .. [2] Maurice G. Kendall, "The treatment of ties in ranking problems", Biometrika Vol. 33, No. 3, pp. 239-251. 1945. .. [3] Gottfried E. Noether, "Elements of Nonparametric Statistics", John Wiley & Sons, 1967. .. [4] Peter M. Fenwick, "A new data structure for cumulative frequency tables", Software: Practice and Experience, Vol. 24, No. 3, pp. 327-336, 1994. ''' if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) r = StatsUtil.kendalltau(x.asarray(), y.asarray()) return r
def chisquare(f_obs, f_exp=None): ''' Calculates a one-way chi square test. The chi square test tests the null hypothesis that the categorical data has the given frequencies. :param f_obs: (*array_like*) Observed frequencies in each category. :param f_exp: (*array_like*) Expected frequencies in each category. By default the categories are assumed to be equally likely. :returns: Chi-square statistic and p-value ''' if isinstance(f_obs, list): f_obs = MIArray(ArrayUtil.array(f_obs)) if f_exp is None: n = len(f_obs) f_exp = minum.ones(n) / n * f_obs.sum() elif isinstance(f_exp, list): f_exp = MIArray(ArrayUtil.array(f_exp)) r = StatsUtil.chiSquareTest(f_exp.asarray(), f_obs.asarray()) return r[0], r[1]
def __init__(self, x, y, z, kind='linear'): if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) if isinstance(z, list): z = MIArray(ArrayUtil.array(z)) self._func = InterpUtil.getBiInterpFunc(x.asarray(), y.asarray(), z.asarray())
def spearmanr(m, y=None, axis=0): ''' Calculates a Spearman rank-order correlation coefficient. The Spearman correlation is a nonparametric measure of the monotonicity of the relationship between two datasets. Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact monotonic relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases. :param m: (*array_like*) A 1-D or 2-D array containing multiple variables and observations. :param y: (*array_like*) Optional. An additional set of variables and observations. y has the same form as that of m. :param axis: (*int*) If axis=0 (default), then each column represents a variable, with observations in the rows. If axis=1, the relationship is transposed: each row represents a variable, while the columns contain observations.. :returns: Spearman correlation matrix. ''' if isinstance(m, list): m = MIArray(ArrayUtil.array(m)) if axis == 1 and m.ndim == 2: m = m.T if y is None: r = StatsUtil.spearmanr(m.asarray()) if isinstance(r, Array): return MIArray(r) else: return r else: if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) if axis == 1 and y.ndim == 2: y = y.T r = StatsUtil.spearmanr(m.asarray(), y.asarray()) return MIArray(r)
class Series(object): def __init__(self, data=None, index=None, name=None, series=None): ''' One-dimensional array with axis labels (including time series). :param data: (*array_like*) One-dimensional array data. :param index: (*list*) Data index list. Values must be unique and hashable, same length as data. :param name: (*string*) Series name. ''' if series is None: if isinstance(data, (list, tuple)): data = minum.array(data) if index is None: index = range(0, len(data)) else: if len(data) != len(index): raise ValueError('Wrong length of index!') if isinstance(index, (MIArray, DimArray)): index = index.tolist() if isinstance(index, Index): self._index = index else: self._index = Index.factory(index) self._data = data self._series = MISeries(data.array, self._index._index, name) else: self._series = series self._data = MIArray(self._series.getData()) self._index = Index.factory(index=self._series.getIndex()) #---- index property def get_index(self): return self._index def set_index(self, value): self._index = Index(value) self._series.setIndex(self._index.data) index = property(get_index, set_index) #---- values property def get_values(self): if isinstance(self._data[0], Date): return miutil.pydate(self._data.aslist()) else: return self._data def set_values(self, value): self._data = minum.array(value) self._series.setData(self._data.array) values = property(get_values, set_values) #---- name property def get_name(self): return self._series.getName() def set_name(self, value): self._series.setName(value) name = property(get_name, set_name) #---- dtype property def get_dtype(self): return self.values.dtype dtype = property(get_dtype) def __getitem__(self, key): if isinstance(key, Index): key = key.data elif isinstance(key, datetime.datetime): key = miutil.jdatetime(key) if isinstance(key, int): if key < 0 or key >= self.__len__(): raise KeyError(key) return self._series.getValue(key) elif isinstance(key, (list, tuple, MIArray)): if isinstance(key, MIArray): key = key.aslist() if isinstance(key[0], datetime.datetime): key = miutil.jdatetime(key) if isinstance(key[0], int): r = self._series.getValues(key) else: r = self._series.getValueByIndex(key) return Series(series=r) elif isinstance(key, slice): if isinstance(key.start, basestring): sidx = self._index.index(key.start) if sidx < 0: sidx = 0 else: sidx = 0 if key.start is None else key.start if sidx < 0: sidx = self.__len__() + sidx if isinstance(key.stop, basestring): eidx = self._index.index(key.stop) if eidx < 0: eidx = self.__len__() else: eidx = self.__len__() - 1 if key.stop is None else key.stop - 1 if eidx < 0: eidx = self.__len__() + eidx step = 1 if key.step is None else key.step rowkey = Range(sidx, eidx, step) r = self._series.getValues(rowkey) return Series(series=r) else: r = self._series.getValueByIndex(key) if isinstance(r, MISeries): return Series(series=r) else: return r # i = self._series.getIndex().indexOf(key) # if i < 0: # raise KeyError(key) # return self._series.getValue(i) def __setitem__(self, key, value): if isinstance(key, Series): self._series.setValue(key._series, value) return None ikey = self.__getkey(key) self.values.__setitem__(ikey, value) def __getkey(self, key): if isinstance(key, basestring): ikey = self.index.get_loc(key) if len(ikey) == 1: ikey = ikey[0] elif len(ikey) > 1: ikey = list(ikey) else: raise KeyError(key) return ikey elif isinstance(key, (list, tuple, MIArray, DimArray)) and isinstance( key[0], basestring): if isinstance(key, (MIArray, DimArray)): key = key.asarray() ikey = self.index.get_indices(key) if len(ikey) == 0: raise KeyError() else: ikey = list(ikey) return ikey else: return key def __iter__(self): """ provide iteration over the values of the Series """ #return iter(self.values) #return zip(iter(self.index), iter(self.values)) return iter(self.index) def iteritems(self): """ Lazily iterate over (index, value) tuples """ return zip(iter(self.index), iter(self)) def __len__(self): return self.values.__len__() def __str__(self): return self.__repr__() def __repr__(self): return self._series.toString() def __eq__(self, other): r = Series(series=self._series.equal(other)) return r def __lt__(self, other): r = Series(series=self._series.lessThan(other)) return r def __le__(self, other): r = Series(series=self._series.lessThanOrEqual(other)) return r def __gt__(self, other): r = Series(series=self._series.greaterThan(other)) return r def __ge__(self, other): r = Series(series=self._series.greaterThanOrEqual(other)) return r def head(self, n=5): ''' Get top rows :param n: (*int*) row number. :returns: Top rows ''' print self._series.head(n) def tail(self, n=5): ''' Get bottom rows :param n: (*int*) row number. :returns: Bottom rows ''' print self._series.tail(n) def mean(self): ''' Return the mean of the values :returns: Mean value ''' r = self._series.mean() if isinstance(r, (MISeries)): return Series(series=r) else: return r def max(self): ''' Return the maximum of the values :returns: Maximum value ''' r = self._series.max() if isinstance(r, (MISeries)): return Series(series=r) else: return r def min(self): ''' Return the minimum of the values :returns: Minimum value ''' r = self._series.min() if isinstance(r, (MISeries)): return Series(series=r) else: return r def groupby(self, by=None): ''' Group Series. :param by: Used to determine the groups for the groupby. :returns: GroupBy object. ''' gb = self._series.groupBy(by) return groupby.GroupBy(gb) def resample(self, by): ''' Group series by date time index. :param by: Used to determine the groups for the groupby. :returns: GroupBy object. ''' gb = self._series.resample(by) return groupby.GroupBy(gb) #################################################################
def __init__(self, x, y, kind='linear'): if isinstance(x, list): x = MIArray(ArrayUtil.array(x)) if isinstance(y, list): y = MIArray(ArrayUtil.array(y)) self._func = InterpUtil.getInterpFunc(x.asarray(), y.asarray(), kind)
def __getitem__(self, key): if isinstance(key, basestring): data = self._dataframe.getColumnData(key) if data is None: return data idx = self._index[:] r = series.Series(MIArray(data), idx, key) return r hascolkey = True if isinstance(key, tuple): ridx = key[0] cidx = key[1] if isinstance(ridx, int) and isinstance(cidx, int): if ridx < 0: ridx = self.shape[0] + ridx if cidx < 0: cidx = self.shape[1] + cidx return self._dataframe.getValue(ridx, cidx) elif isinstance(ridx, int) and isinstance(cidx, basestring): if ridx < 0: ridx = self.shape[0] + ridx return self._dataframe.getValue(ridx, cidx) else: key = (key, slice(None)) hascolkey = False k = key[0] if isinstance(k, Index): k = k.data if isinstance(k, int): if k < 0: k = self.shape[0] + k rowkey = k elif isinstance(k, basestring): sidx = self._index.index(k) if sidx < 0: return None eidx = sidx step = 1 rowkey = Range(sidx, eidx, step) elif isinstance(k, slice): if isinstance(k.start, basestring): sidx = self._index.index(k.start) if sidx < 0: sidx = 0 else: sidx = 0 if k.start is None else k.start if sidx < 0: sidx = self.shape[0] + sidx if isinstance(k.stop, basestring): eidx = self._index.index(k.stop) if eidx < 0: eidx = self.shape[0] + eidx else: eidx = self.shape[0] - 1 if k.stop is None else k.stop - 1 if eidx < 0: eidx = self.shape[0] + eidx step = 1 if k.step is None else k.step rowkey = Range(sidx, eidx, step) elif isinstance(k, (list, tuple, MIArray)): if isinstance(k[0], int): rowkey = k else: tlist = [] for tstr in k: idx = self._index.index(tstr) if idx >= 0: tlist.append(idx) rowkey = tlist else: rowkey = self._index.get_loc(k) if not hascolkey: colkey = Range(0, self.shape[1] - 1, 1) else: k = key[1] if isinstance(k, int): sidx = k if sidx < 0: sidx = self.shape[1] + sidx eidx = sidx step = 1 colkey = Range(sidx, eidx, step) elif isinstance(k, slice): sidx = 0 if k.start is None else k.start if sidx < 0: sidx = self.shape[1] + sidx eidx = self.shape[1] - 1 if k.stop is None else k.stop - 1 if eidx < 0: eidx = self.shape[1] + eidx step = 1 if k.step is None else k.step colkey = Range(sidx, eidx, step) elif isinstance(k, list): if isinstance(k[0], int): colkey = k else: colkey = self.columns.indexOfName(k) elif isinstance(k, basestring): col = self.columns.indexOf(k) colkey = Range(col, col + 1, 1) else: return None r = self._dataframe.select(rowkey, colkey) if r is None: return None if isinstance(r, MISeries): r = series.Series(series=r) else: r = DataFrame(dataframe=r) return r
class Series(object): def __init__(self, data=None, index=None, name=None, series=None): ''' One-dimensional array with axis labels (including time series). :param data: (*array_like*) One-dimensional array data. :param index: (*list*) Data index list. Values must be unique and hashable, same length as data. :param name: (*string*) Series name. ''' if series is None: if isinstance(data, (list, tuple)): data = minum.array(data) if index is None: index = range(0, len(data)) else: if len(data) != len(index): raise ValueError('Wrong length of index!') if isinstance(index, (MIArray, DimArray)): index = index.tolist() if isinstance(index, Index): self._index = index else: self._index = Index.factory(index) self._data = data self._series = MISeries(data.array, self._index._index, name) else: self._series = series self._data = MIArray(self._series.getData()) self._index = Index.factory(index=self._series.getIndex()) #---- index property def get_index(self): return self._index def set_index(self, value): self._index = Index(value) self._series.setIndex(self._index.data) index = property(get_index, set_index) #---- values property def get_values(self): if isinstance(self._data[0], Date): return miutil.pydate(self._data.aslist()) else: return self._data def set_values(self, value): self._data = minum.array(value) self._series.setData(self._data.array) values = property(get_values, set_values) #---- name property def get_name(self): return self._series.getName() def set_name(self, value): self._series.setName(value) name = property(get_name, set_name) #---- dtype property def get_dtype(self): return self.values.dtype dtype = property(get_dtype) def __getitem__(self, key): if isinstance(key, Index): key = key.data elif isinstance(key, datetime.datetime): key = miutil.jdatetime(key) if isinstance(key, int): if key < 0 or key >= self.__len__(): raise KeyError(key) return self._series.getValue(key) elif isinstance(key, (list, tuple, MIArray)): if isinstance(key, MIArray): key = key.aslist() if isinstance(key[0], datetime.datetime): key = miutil.jdatetime(key) if isinstance(key[0], int): r = self._series.getValues(key) else: r = self._series.getValueByIndex(key) return Series(series=r) elif isinstance(key, slice): if isinstance(key.start, basestring): sidx = self._index.index(key.start) if sidx < 0: sidx = 0 else: sidx = 0 if key.start is None else key.start if sidx < 0: sidx = self.__len__() + sidx if isinstance(key.stop, basestring): eidx = self._index.index(key.stop) if eidx < 0: eidx = self.__len__() else: eidx = self.__len__() - 1 if key.stop is None else key.stop - 1 if eidx < 0: eidx = self.__len__() + eidx step = 1 if key.step is None else key.step rowkey = Range(sidx, eidx, step) r = self._series.getValues(rowkey) return Series(series=r) else: r = self._series.getValueByIndex(key) if isinstance(r, MISeries): return Series(series=r) else: return r # i = self._series.getIndex().indexOf(key) # if i < 0: # raise KeyError(key) # return self._series.getValue(i) def __setitem__(self, key, value): if isinstance(key, Series): self._series.setValue(key._series, value) return None ikey = self.__getkey(key) self.values.__setitem__(ikey, value) def __getkey(self, key): if isinstance(key, basestring): ikey = self.index.get_loc(key) if len(ikey) == 1: ikey = ikey[0] elif len(ikey) > 1: ikey = list(ikey) else: raise KeyError(key) return ikey elif isinstance(key, (list, tuple, MIArray, DimArray)) and isinstance(key[0], basestring): if isinstance(key, (MIArray, DimArray)): key = key.asarray() ikey = self.index.get_indices(key) if len(ikey) == 0: raise KeyError() else: ikey = list(ikey) return ikey else: return key def __iter__(self): """ provide iteration over the values of the Series """ #return iter(self.values) #return zip(iter(self.index), iter(self.values)) return iter(self.index) def iteritems(self): """ Lazily iterate over (index, value) tuples """ return zip(iter(self.index), iter(self)) def __len__(self): return self.values.__len__() def __str__(self): return self.__repr__() def __repr__(self): return self._series.toString() def __eq__(self, other): r = Series(series=self._series.equal(other)) return r def __lt__(self, other): r = Series(series=self._series.lessThan(other)) return r def __le__(self, other): r = Series(series=self._series.lessThanOrEqual(other)) return r def __gt__(self, other): r = Series(series=self._series.greaterThan(other)) return r def __ge__(self, other): r = Series(series=self._series.greaterThanOrEqual(other)) return r def head(self, n=5): ''' Get top rows :param n: (*int*) row number. :returns: Top rows ''' print self._series.head(n) def tail(self, n=5): ''' Get bottom rows :param n: (*int*) row number. :returns: Bottom rows ''' print self._series.tail(n) def mean(self): ''' Return the mean of the values :returns: Mean value ''' r = self._series.mean() if isinstance(r, (MISeries)): return Series(series=r) else: return r def max(self): ''' Return the maximum of the values :returns: Maximum value ''' r = self._series.max() if isinstance(r, (MISeries)): return Series(series=r) else: return r def min(self): ''' Return the minimum of the values :returns: Minimum value ''' r = self._series.min() if isinstance(r, (MISeries)): return Series(series=r) else: return r def groupby(self, by=None): ''' Group Series. :param by: Used to determine the groups for the groupby. :returns: GroupBy object. ''' gb = self._series.groupBy(by) return groupby.GroupBy(gb) def resample(self, by): ''' Group series by date time index. :param by: Used to determine the groups for the groupby. :returns: GroupBy object. ''' gb = self._series.resample(by) return groupby.GroupBy(gb) #################################################################
def attrvalue(self, key): attr = self.variable.findAttribute(key) if attr is None: return None v = MIArray(attr.getValues()) return v
def read(self): return MIArray(self.dataset.read(self.name))
def __getitem__(self, indices): if indices is None: rr = self.dataset.read(self.name) if rr.getDataType().isNumeric(): ArrayMath.missingToNaN(rr, self.fill_value) array = MIArray(rr) data = DimArray(array, self.dims, self.fill_value, self.dataset.proj) return data else: return rr if isinstance(indices, str): #metadata rr = self.dataset.read(self.name) m = rr.findMember(indices) data = rr.getArray(0, m) return MIArray(data) if not isinstance(indices, tuple): inds = [] inds.append(indices) indices = inds if len(indices) != self.ndim: print 'indices must be ' + str(self.ndim) + ' dimensions!' return None if not self.proj is None and not self.proj.isLonLat(): xlim = None ylim = None xidx = -1 yidx = -1 for i in range(0, self.ndim): dim = self.dims[i] if dim.getDimType() == DimensionType.X: k = indices[i] if isinstance(k, basestring): xlims = k.split(':') xlim = [float(xlims[0]), float(xlims[1])] xidx = i elif dim.getDimType() == DimensionType.Y: k = indices[i] if isinstance(k, basestring): ylims = k.split(':') ylim = [float(ylims[0]), float(ylims[1])] yidx = i if not xlim is None and not ylim is None: fromproj=KnownCoordinateSystems.geographic.world.WGS1984 inpt = PointD(xlim[0], ylim[0]) outpt1 = Reproject.reprojectPoint(inpt, fromproj, self.proj) inpt = PointD(xlim[1], ylim[1]) outpt2 = Reproject.reprojectPoint(inpt, fromproj, self.proj) xlim = [outpt1.X, outpt2.X] ylim = [outpt1.Y, outpt2.Y] indices1 = [] for i in range(0, self.ndim): if i == xidx: indices1.append(str(xlim[0]) + ':' + str(xlim[1])) elif i == yidx: indices1.append(str(ylim[0]) + ':' + str(ylim[1])) else: indices1.append(indices[i]) indices = indices1 origin = [] size = [] stride = [] ranges = [] dims = [] flips = [] onlyrange = True for i in range(0, self.ndim): isrange = True dimlen = self.dimlen(i) k = indices[i] if isinstance(k, int): if k < 0: k = self.dims[i].getLength() + k sidx = k eidx = k step = 1 elif isinstance(k, slice): sidx = 0 if k.start is None else k.start if sidx < 0: sidx = self.dimlen(i) + sidx eidx = self.dimlen(i)-1 if k.stop is None else k.stop if eidx < 0: eidx = self.dimlen(i) + eidx step = 1 if k.step is None else k.step elif isinstance(k, list): if not isinstance(k[0], datetime.datetime): onlyrange = False isrange = False ranges.append(k) else: dim = self.variable.getDimension(i) sv = k[0] sv = miutil.date2num(sv) dim = self.variable.getDimension(i) sidx = dim.getValueIndex(sv) if len(k) == 1: eidx = sidx step = 1 else: ev = k[1] ev = miutil.date2num(ev) eidx = dim.getValueIndex(ev) if len(k) == 2: step = 1 else: nv = k[2] nv = miutil.date2num(k[0] + k[2]) - sv step = int(nv / dim.getDeltaValue()) if sidx > eidx: iidx = eidx eidx = sidx sidx = iidx elif isinstance(k, basestring): dim = self.variable.getDimension(i) kvalues = k.split(':') sv = float(kvalues[0]) if isinstance(sv, datetime.datetime): sv = miutil.date2num(sv) sidx = dim.getValueIndex(sv) if len(kvalues) == 1: eidx = sidx step = 1 else: ev = float(kvalues[1]) if isinstance(ev, datetime.datetime): ev = miutil.date2num(ev) eidx = dim.getValueIndex(ev) if len(kvalues) == 2: step = 1 else: step = int(float(kvalues[2]) / dim.getDeltaValue()) if sidx > eidx: iidx = eidx eidx = sidx sidx = iidx else: print k return None if isrange: if eidx >= dimlen: print 'Index out of range!' return None origin.append(sidx) n = eidx - sidx + 1 size.append(n) if n > 1: dim = self.variable.getDimension(i) if dim.isReverse(): step = -step dims.append(dim.extract(sidx, eidx, step)) stride.append(step) if step < 0: step = abs(step) flips.append(i) rr = Range(sidx, eidx, step) ranges.append(rr) else: if len(k) > 1: dim = self.variable.getDimension(i) dims.append(dim.extract(k)) #rr = self.dataset.read(self.name, origin, size, stride).reduce() if onlyrange: rr = self.dataset.dataset.read(self.name, ranges) else: rr = self.dataset.dataset.take(self.name, ranges) if rr.getSize() == 1: return rr.getObject(0) else: for i in flips: rr = rr.flip(i) rr = rr.reduce() ArrayMath.missingToNaN(rr, self.fill_value) rrr = Array.factory(rr.getDataType(), rr.getShape()); MAMath.copy(rrr, rr); array = MIArray(rrr) data = DimArray(array, dims, self.fill_value, self.dataset.proj) return data
def attrvalue(self, key): attr = self.dataset.getDataInfo().findGlobalAttribute(key) if attr is None: return None v = MIArray(attr.getValues()) return v