def __init__(self, data=None, index=None, name=None, series=None): ''' One-dimensional array with axis labels (including time series). :param data: (*array_like*) One-dimensional array data. :param index: (*list*) Data index list. Values must be unique and hashable, same length as data. :param name: (*string*) Series name. ''' if series is None: if isinstance(data, (list, tuple)): data = np.array(data) if index is None: index = range(0, len(data)) else: if len(data) != len(index): raise ValueError('Wrong length of index!') if isinstance(index, (NDArray, DimArray)): index = index.tolist() if isinstance(index, Index): self._index = index else: self._index = Index.factory(index) self._data = data self._series = MISeries(data._array, self._index._index, name) else: self._series = series self._data = NDArray(self._series.getData()) self._index = Index.factory(index=self._series.getIndex())
def get_data(self): r = self._dataframe.getData() if isinstance(r, Array): r = NDArray(r) else: rr = [] for d in r: rr.append(NDArray(d)) r = rr return r
def attrvalue(self, key): ''' Get a global attribute value by key. ''' attr = self.dataset.getDataInfo().findGlobalAttribute(key) if attr is None: return None v = NDArray(attr.getValues()) return v
def dimvalue(self, idx, convert=False): ''' Get dimension values. :param idx: (*int*) Dimension index. :param convert: (*boolean*) If convert to real values (i.e. datetime). Default is ``False``. :returns: (*array_like*) Dimension values ''' dim = self.dims[idx] if convert: if dim.getDimType() == DimensionType.T: return miutil.nums2dates(dim.getDimValue()) else: return NDArray(ArrayUtil.array(self.dims[idx].getDimValue())) else: return NDArray(ArrayUtil.array(self.dims[idx].getDimValue()))
def reproject(a, x=None, y=None, toproj=None, method='bilinear'): """ Project array :param a: (*array*) Input array :param x: To x coordinates. :param y: To y coordinates. :param toproj: To projection. :param method: Interpolation method: ``bilinear`` or ``neareast`` . :returns: (*NDArray*) Projected array """ yy = a.dims[a.ndim - 2].getDimValue() xx = a.dims[a.ndim - 1].getDimValue() fromproj = ProjectionInfo.factory(a.proj) if toproj is None: toproj = fromproj if x is None or y is None: pr = Reproject.reproject(a._array, xx, yy, fromproj, toproj) r = pr[0] x = pr[1] y = pr[2] dims = a.dims ydim = Dimension(DimensionType.Y) ydim.setDimValues(np.NDArray(y).aslist()) dims[-2] = ydim xdim = Dimension(DimensionType.X) xdim.setDimValues(np.NDArray(x).aslist()) dims[-1] = xdim rr = DimArray(np.NDArray(r), dims, a.fill_value, toproj) return rr if method == 'bilinear': method = ResampleMethods.Bilinear else: method = ResampleMethods.NearestNeighbor if isinstance(x, list): r = Reproject.reproject(a._array, xx, yy, x, y, fromproj, toproj, a.fill_value, method) elif isinstance(x, NDArray): if x.ndim == 1: r = Reproject.reproject(a._array, xx, yy, x.aslist(), y.aslist(), fromproj, toproj, a.fill_value, method) else: r = Reproject.reproject(a._array, xx, yy, x.asarray(), y.asarray(), fromproj, toproj, a.fill_value, method) else: r = Reproject.reproject(a._array, xx, yy, x.asarray(), y.asarray(), fromproj, toproj, a.fill_value, method) #r = Reproject.reproject(self.array, xx, yy, x.asarray(), y.asarray(), self.proj, toproj, self.fill_value, method) return NDArray(r)
def binread(fn, dim, datatype=None, skip=0, byteorder='little_endian'): """ Read data array from a binary file. :param fn: (*string*) The binary file name for data reading. :param dim: (*list*) Dimensions. :param datatype: (*string*) Data type string [byte | short | int | float | double]. :param skip: (*int*) Skip bytes number. :param byteorder: (*string*) Byte order. ``little_endian`` or ``big_endian``. :returns: (*NDArray*) Data array """ if not os.path.exists(fn): raise IOError('No such file: ' + fn) r = ArrayUtil.readBinFile(fn, dim, datatype, skip, byteorder) return NDArray(r)
def asciiread(filename, **kwargs): ''' Read data from an ASCII file. :param filename: (*string*) The ASCII file name. :param delimiter: (*string*) Field delimiter character. Default is ``None``, means space or tab delimiter. :param headerlines: (*int*) Lines to skip at beginning of the file. Default is ``0``. :param shape: (*string*) Data array dimension shape. Default is ``None``, the file content will be readed as one dimension array. :param readfirstcol: (*boolean*) Read first column data or not. Default is ``True``. :returns: (*NDArray*) The data array. ''' if not os.path.exists(filename): raise IOError('No such file: ' + filename) delimiter = kwargs.pop('delimiter', None) datatype = kwargs.pop('datatype', None) headerlines = kwargs.pop('headerlines', 0) shape = kwargs.pop('shape', None) rfirstcol = kwargs.pop('readfirstcol', True) a = ArrayUtil.readASCIIFile(filename, delimiter, headerlines, datatype, shape, rfirstcol) return NDArray(a)
class Series(object): def __init__(self, data=None, index=None, name=None, series=None): ''' One-dimensional array with axis labels (including time series). :param data: (*array_like*) One-dimensional array data. :param index: (*list*) Data index list. Values must be unique and hashable, same length as data. :param name: (*string*) Series name. ''' if series is None: if isinstance(data, (list, tuple)): data = np.array(data) if index is None: index = range(0, len(data)) else: if len(data) != len(index): raise ValueError('Wrong length of index!') if isinstance(index, (NDArray, DimArray)): index = index.tolist() if isinstance(index, Index): self._index = index else: self._index = Index.factory(index) self._data = data self._series = MISeries(data._array, self._index._index, name) else: self._series = series self._data = NDArray(self._series.getData()) self._index = Index.factory(index=self._series.getIndex()) #---- index property def get_index(self): return self._index def set_index(self, value): self._index = Index(value) self._series.setIndex(self._index.data) index = property(get_index, set_index) #---- values property def get_values(self): if isinstance(self._data[0], Date): return miutil.pydate(self._data.aslist()) else: return self._data def set_values(self, value): self._data = np.array(value) self._series.setData(self._data._array) values = property(get_values, set_values) #---- name property def get_name(self): return self._series.getName() def set_name(self, value): self._series.setName(value) name = property(get_name, set_name) #---- dtype property def get_dtype(self): return self.values.dtype dtype = property(get_dtype) def __getitem__(self, key): if isinstance(key, Index): key = key.data elif isinstance(key, datetime.datetime): key = miutil.jdatetime(key) if isinstance(key, int): if key < 0 or key >= self.__len__(): raise KeyError(key) return self._series.getValue(key) elif isinstance(key, (list, tuple, NDArray)): if isinstance(key, NDArray): key = key.aslist() if isinstance(key[0], datetime.datetime): key = miutil.jdatetime(key) if isinstance(key[0], int): r = self._series.getValues(key) else: r = self._series.getValueByIndex(key) return Series(series=r) elif isinstance(key, slice): if isinstance(key.start, basestring): sidx = self._index.index(key.start) if sidx < 0: sidx = 0 else: sidx = 0 if key.start is None else key.start if sidx < 0: sidx = self.__len__() + sidx if isinstance(key.stop, basestring): eidx = self._index.index(key.stop) if eidx < 0: eidx = self.__len__() else: eidx = self.__len__() - 1 if key.stop is None else key.stop - 1 if eidx < 0: eidx = self.__len__() + eidx step = 1 if key.step is None else key.step rowkey = Range(sidx, eidx, step) r = self._series.getValues(rowkey) return Series(series=r) else: r = self._series.getValueByIndex(key) if isinstance(r, MISeries): return Series(series=r) else: return r # i = self._series.getIndex().indexOf(key) # if i < 0: # raise KeyError(key) # return self._series.getValue(i) def __setitem__(self, key, value): if isinstance(key, Series): self._series.setValue(key._series, value) return None ikey = self.__getkey(key) self.values.__setitem__(ikey, value) def __getkey(self, key): if isinstance(key, basestring): ikey = self.index.get_loc(key) if len(ikey) == 1: ikey = ikey[0] elif len(ikey) > 1: ikey = list(ikey) else: raise KeyError(key) return ikey elif isinstance(key, (list, tuple, NDArray, DimArray)) and isinstance(key[0], basestring): if isinstance(key, (NDArray, DimArray)): key = key.asarray() ikey = self.index.get_indices(key) if len(ikey) == 0: raise KeyError() else: ikey = list(ikey) return ikey else: return key def __iter__(self): """ provide iteration over the values of the Series """ #return iter(self.values) #return zip(iter(self.index), iter(self.values)) return iter(self.index) def iteritems(self): """ Lazily iterate over (index, value) tuples """ return zip(iter(self.index), iter(self)) def __len__(self): return self.values.__len__() def __str__(self): return self.__repr__() def __repr__(self): return self._series.toString() def __eq__(self, other): r = Series(series=self._series.equal(other)) return r def __lt__(self, other): r = Series(series=self._series.lessThan(other)) return r def __le__(self, other): r = Series(series=self._series.lessThanOrEqual(other)) return r def __gt__(self, other): r = Series(series=self._series.greaterThan(other)) return r def __ge__(self, other): r = Series(series=self._series.greaterThanOrEqual(other)) return r def head(self, n=5): ''' Get top rows :param n: (*int*) row number. :returns: Top rows ''' print self._series.head(n) def tail(self, n=5): ''' Get bottom rows :param n: (*int*) row number. :returns: Bottom rows ''' print self._series.tail(n) def mean(self): ''' Return the mean of the values :returns: Mean value ''' r = self._series.mean() if isinstance(r, (MISeries)): return Series(series=r) else: return r def max(self): ''' Return the maximum of the values :returns: Maximum value ''' r = self._series.max() if isinstance(r, (MISeries)): return Series(series=r) else: return r def min(self): ''' Return the minimum of the values :returns: Minimum value ''' r = self._series.min() if isinstance(r, (MISeries)): return Series(series=r) else: return r def std(self): ''' Return the standard deviation of the values :returns: Standard deviation value ''' r = self._series.stdDev() if isinstance(r, (MISeries)): return Series(series=r) else: return r def groupby(self, by=None): ''' Group Series. :param by: Used to determine the groups for the groupby. :returns: GroupBy object. ''' gb = self._series.groupBy(by) return groupby.GroupBy(gb) def resample(self, by): ''' Group series by date time index. :param by: Used to determine the groups for the groupby. :returns: GroupBy object. ''' gb = self._series.resample(by) return groupby.GroupBy(gb) #################################################################
def __getitem__(self, key): if isinstance(key, basestring): data = self._dataframe.getColumnData(key) if data is None: return data idx = self._index[:] r = series.Series(NDArray(data), idx, key) return r hascolkey = True if isinstance(key, tuple): ridx = key[0] cidx = key[1] if isinstance(ridx, int) and isinstance(cidx, int): if ridx < 0: ridx = self.shape[0] + ridx if cidx < 0: cidx = self.shape[1] + cidx return self._dataframe.getValue(ridx, cidx) elif isinstance(ridx, int) and isinstance(cidx, basestring): if ridx < 0: ridx = self.shape[0] + ridx return self._dataframe.getValue(ridx, cidx) else: key = (key, slice(None)) hascolkey = False k = key[0] if isinstance(k, Index): k = k.data if isinstance(k, int): if k < 0: k = self.shape[0] + k rowkey = k elif isinstance(k, basestring): sidx = self._index.index(k) if sidx < 0: return None eidx = sidx step = 1 rowkey = Range(sidx, eidx, step) elif isinstance(k, slice): if isinstance(k.start, basestring): sidx = self._index.index(k.start) if sidx < 0: sidx = 0 else: sidx = 0 if k.start is None else k.start if sidx < 0: sidx = self.shape[0] + sidx if isinstance(k.stop, basestring): eidx = self._index.index(k.stop) if eidx < 0: eidx = self.shape[0] + eidx else: eidx = self.shape[0] - 1 if k.stop is None else k.stop - 1 if eidx < 0: eidx = self.shape[0] + eidx step = 1 if k.step is None else k.step rowkey = Range(sidx, eidx, step) elif isinstance(k, (list, tuple, NDArray)): if isinstance(k[0], int): rowkey = k else: tlist = [] for tstr in k: idx = self._index.index(tstr) if idx >= 0: tlist.append(idx) rowkey = tlist else: rowkey = self._index.get_loc(k) if not hascolkey: colkey = Range(0, self.shape[1] - 1, 1) else: k = key[1] if isinstance(k, int): sidx = k if sidx < 0: sidx = self.shape[1] + sidx eidx = sidx step = 1 colkey = Range(sidx, eidx, step) elif isinstance(k, slice): sidx = 0 if k.start is None else k.start if sidx < 0: sidx = self.shape[1] + sidx eidx = self.shape[1] - 1 if k.stop is None else k.stop - 1 if eidx < 0: eidx = self.shape[1] + eidx step = 1 if k.step is None else k.step colkey = Range(sidx, eidx, step) elif isinstance(k, list): if isinstance(k[0], int): colkey = k else: colkey = self.columns.indexOfName(k) elif isinstance(k, basestring): col = self.columns.indexOf(k) colkey = Range(col, col + 1, 1) else: return None r = self._dataframe.select(rowkey, colkey) if r is None: return None if isinstance(r, MISeries): r = series.Series(series=r) else: r = DataFrame(dataframe=r) return r
def __getitem__(self, indices): if isinstance(indices, slice) and self.ndim > 1: k = indices if k.start is None and k.stop is None and k.step is None: inds = [] for i in range(self.ndim): inds.append(slice(None)) indices = tuple(inds) # if isinstance(indices, tuple): # allnone = True # for k in indices: # if isinstance(k, slice): # if (not k.start is None) or (not k.stop is None) or (not k.step is None): # allnone = False # break # else: # allnone = False # break # if allnone: # r = self.dataset.dataset.read(self.name) # return DimArray(r, self.dims, self.fill_value, self.proj) if indices is None: inds = [] for i in range(self.ndim): inds.append(slice(None)) indices = tuple(inds) if isinstance(indices, str): #metadata rr = self.dataset.read(self.name) m = rr.findMember(indices) data = rr.getArray(0, m) return NDArray(data) if not isinstance(indices, tuple): inds = [] inds.append(indices) indices = inds if len(indices) != self.ndim: print 'indices must be ' + str(self.ndim) + ' dimensions!' return None if not self.proj is None and not ProjUtil.isLonLat(self.proj): xlim = None ylim = None xidx = -1 yidx = -1 for i in range(0, self.ndim): dim = self.dims[i] if dim.getDimType() == DimensionType.X: k = indices[i] if isinstance(k, basestring): xlims = k.split(':') if len(xlims) == 1: xlim = [float(xlims[0])] else: xlim = [float(xlims[0]), float(xlims[1])] xidx = i elif dim.getDimType() == DimensionType.Y: k = indices[i] if isinstance(k, basestring): ylims = k.split(':') if len(ylims) == 1: ylim = [float(ylims[0])] else: ylim = [float(ylims[0]), float(ylims[1])] yidx = i if not xlim is None and not ylim is None: fromproj = KnownCoordinateSystems.geographic.world.WGS1984 inpt = PointD(xlim[0], ylim[0]) outpt1 = ProjUtil.reprojectPoint(inpt, fromproj, self.proj) if len(xlim) == 1: xlim = [outpt1.X] ylim = [outpt1.Y] else: inpt = PointD(xlim[1], ylim[1]) outpt2 = ProjUtil.reprojectPoint(inpt, fromproj, self.proj) xlim = [outpt1.X, outpt2.X] ylim = [outpt1.Y, outpt2.Y] indices1 = [] for i in range(0, self.ndim): if i == xidx: if len(xlim) == 1: indices1.append(str(xlim[0])) else: indices1.append(str(xlim[0]) + ':' + str(xlim[1])) elif i == yidx: if len(ylim) == 1: indices1.append(str(ylim[0])) else: indices1.append(str(ylim[0]) + ':' + str(ylim[1])) else: indices1.append(indices[i]) indices = indices1 origin = [] size = [] stride = [] ranges = [] dims = [] flips = [] onlyrange = True for i in range(0, self.ndim): isrange = True dimlen = self.dimlen(i) k = indices[i] if isinstance(k, int): if k < 0: k = self.dims[i].getLength() + k sidx = k eidx = k step = 1 elif isinstance(k, slice): if isinstance(k.start, basestring): sv = float(k.start) sidx = self.dims[i].getValueIndex(sv) elif isinstance(k.start, datetime.datetime): sv = miutil.date2num(k.start) sidx = self.dims[i].getValueIndex(sv) else: sidx = 0 if k.start is None else k.start if sidx < 0: sidx = self.dimlen(i) + sidx if isinstance(k.stop, basestring): ev = float(k.stop) eidx = self.dims[i].getValueIndex(ev) elif isinstance(k.stop, datetime.datetime): ev = miutil.date2num(k.stop) eidx = self.dims[i].getValueIndex(ev) else: eidx = self.dimlen(i) if k.stop is None else k.stop if eidx < 0: eidx = self.dimlen(i) + eidx eidx -= 1 if isinstance(k.step, basestring): nv = float(k.step) + self.dims[i].getDimValue()[0] nidx = self.dims[i].getValueIndex(nv) step = nidx - sidx elif isinstance(k.step, datetime.timedelta): nv = miutil.date2num(k.start + k.step) nidx = self.dims[i].getValueIndex(nv) step = nidx - sidx else: step = 1 if k.step is None else k.step if sidx > eidx: iidx = eidx eidx = sidx sidx = iidx elif isinstance(k, list): onlyrange = False isrange = False if not isinstance(k[0], datetime.datetime): ranges.append(k) else: tlist = [] for tt in k: sv = miutil.date2num(tt) idx = self.dims[i].getValueIndex(sv) tlist.append(idx) ranges.append(tlist) k = tlist elif isinstance(k, basestring): dim = self.variable.getDimension(i) kvalues = k.split(':') sv = float(kvalues[0]) sidx = dim.getValueIndex(sv) if len(kvalues) == 1: eidx = sidx step = 1 else: ev = float(kvalues[1]) eidx = dim.getValueIndex(ev) if len(kvalues) == 2: step = 1 else: step = int(float(kvalues[2]) / dim.getDeltaValue()) if sidx > eidx: iidx = eidx eidx = sidx sidx = iidx else: print k return None if isrange: if eidx >= dimlen: print 'Index out of range!' return None origin.append(sidx) n = eidx - sidx + 1 size.append(n) if n > 1: dim = self.variable.getDimension(i) if dim.isReverse(): step = -step dim = dim.extract(sidx, eidx, step) dim.setReverse(False) dims.append(dim) stride.append(step) if step < 0: step = abs(step) flips.append(i) rr = Range(sidx, eidx, step) ranges.append(rr) else: if len(k) > 1: dim = self.variable.getDimension(i) dim = dim.extract(k) dim.setReverse(False) dims.append(dim) #rr = self.dataset.read(self.name, origin, size, stride).reduce() if onlyrange: rr = self.dataset.dataset.read(self.name, ranges).getArray() else: rr = self.dataset.dataset.take(self.name, ranges).getArray() if rr.getSize() == 1: return rr.getObject(0) else: for i in flips: rr = rr.flip(i) rr = rr.reduce() ArrayMath.missingToNaN(rr, self.fill_value) if len(flips) > 0: rrr = Array.factory(rr.getDataType(), rr.getShape()) MAMath.copy(rrr, rr) array = NDArray(rrr) else: array = NDArray(rr) data = DimArray(array, dims, self.fill_value, self.dataset.proj) return data
def attrvalue(self, key): attr = self.variable.findAttribute(key) if attr is None: return None v = NDArray(attr.getValues()) return v
def read(self): return NDArray(self.dataset.read(self.name))