def tab_join(ToMerge, keycols=None, nullvals=None, renamer=None, returnrenaming=False, Names=None): ''' Database-join for tabular arrays. Wrapper for :func:`tabular.spreadsheet.join` that deals with the coloring and returns the result as a tabarray. Method calls:: data = tabular.spreadsheet.join ''' [Result, Renaming] = spreadsheet.join(ToMerge, keycols=keycols, nullvals=nullvals, renamer=renamer, returnrenaming=True, Names=Names) if isinstance(ToMerge, dict): Names = ToMerge.keys() else: Names = range(len(ToMerge)) Colorings = dict([ (k, ToMerge[k].coloring) if 'coloring' in dir(ToMerge[k]) else {} for k in Names ]) for k in Names: if k in Renaming.keys(): l = ToMerge[k] Colorings[k] = \ dict([(g, [n if not n in Renaming[k].keys() else Renaming[k][n] for n in l.coloring[g]]) for g in Colorings[k].keys()]) Coloring = {} for k in Colorings.keys(): for j in Colorings[k].keys(): if j in Coloring.keys(): Coloring[j] = utils.uniqify(Coloring[j] + Colorings[k][j]) else: Coloring[j] = utils.uniqify(Colorings[k][j]) Result = Result.view(tabarray) Result.coloring = Coloring if returnrenaming: return [Result, Renaming] else: return Result
def tab_rowstack(ListOfTabArrays, mode='nulls'): """ "Vertical stacking" of tabarrays, e.g. adding rows. Wrapper for :func:`tabular.spreadsheet.rowstack` that deals with the coloring and returns the result as a tabarray. Method calls:: data = tabular.spreadsheet.rowstack(ListOfTabArrays, mode=mode) """ data = spreadsheet.rowstack(ListOfTabArrays, mode=mode) coloring = {} for a in ListOfTabArrays: for k in a.coloring: if k in coloring.keys(): coloring[k] = utils.uniqify(coloring[k] + a.coloring[k]) else: coloring[k] = a.coloring[k] for k in coloring.keys(): s = [x for x in coloring[k] if x in data.dtype.names] if len(s) > 0: coloring[k] = s else: coloring.pop(k) data = data.view(tabarray) data.coloring = coloring return data
def tab_colstack(ListOfTabArrays, mode='abort'): ''' "Horizontal stacking" of tabarrays, e.g. adding columns. Wrapper for :func:`tabular.spreadsheet.colstack` that deals with the coloring and returns the result as a tabarray. Method calls:: data = tabular.spreadsheet.colstack(ListOfTabArrays, mode=mode) ''' data = spreadsheet.colstack(ListOfTabArrays, mode=mode) coloring = {} for a in ListOfTabArrays: for k in a.coloring: if k in coloring.keys(): coloring[k] = utils.uniqify(coloring[k] + a.coloring[k]) else: coloring[k] = a.coloring[k] for k in coloring.keys(): s = [x for x in coloring[k] if x in data.dtype.names] if len(s) > 0: coloring[k] = s else: coloring.pop(k) data = data.view(tabarray) data.coloring = coloring return data
def tab_colstack(ListOfTabArrays, mode='abort'): """ "Horizontal stacking" of tabarrays, e.g. adding columns. Wrapper for :func:`tabular.spreadsheet.colstack` that deals with the coloring and returns the result as a tabarray. Method calls:: data = tabular.spreadsheet.colstack(ListOfTabArrays, mode=mode) """ (data, naming) = spreadsheet.colstack(ListOfTabArrays, mode=mode, returnnaming=True) coloring = {} for (i, a) in enumerate(ListOfTabArrays): namedict = dict([(x,y) for (j,x,y) in naming if i == j]) for k in a.coloring: s = [namedict[kk] for kk in a.coloring[k]] if k in coloring.keys(): coloring[k] = utils.uniqify(coloring[k] + s) else: coloring[k] = s for k in coloring.keys(): s = [x for x in coloring[k] if x in data.dtype.names] if len(s) > 0: coloring[k] = s else: coloring.pop(k) data = data.view(tabarray) data.coloring = coloring return data
def tab_join(ToMerge, keycols=None, nullvals=None, renamer=None, returnrenaming=False, Names=None): ''' Database-join for tabular arrays. Wrapper for :func:`tabular.spreadsheet.join` that deals with the coloring and returns the result as a tabarray. Method calls:: data = tabular.spreadsheet.join ''' [Result,Renaming] = spreadsheet.join(ToMerge, keycols=keycols, nullvals=nullvals, renamer=renamer, returnrenaming=True, Names=Names) if isinstance(ToMerge,dict): Names = ToMerge.keys() else: Names = range(len(ToMerge)) Colorings = dict([(k,ToMerge[k].coloring) if 'coloring' in dir(ToMerge[k]) else {} for k in Names]) for k in Names: if k in Renaming.keys(): l = ToMerge[k] Colorings[k] = \ dict([(g, [n if not n in Renaming[k].keys() else Renaming[k][n] for n in l.coloring[g]]) for g in Colorings[k].keys()]) Coloring = {} for k in Colorings.keys(): for j in Colorings[k].keys(): if j in Coloring.keys(): Coloring[j] = utils.uniqify(Coloring[j] + Colorings[k][j]) else: Coloring[j] = utils.uniqify(Colorings[k][j]) Result = Result.view(tabarray) Result.coloring = Coloring if returnrenaming: return [Result,Renaming] else: return Result
def deletecols(self, cols): """ Delete columns and/or colors. Method wraps:: tabular.spreadsheet.deletecols(self, cols) """ deletenames = utils.uniqify(utils.listunion([[c] if c in self.dtype.names else self.coloring[c] for c in cols])) return spreadsheet.deletecols(self,deletenames)
def GroupByLevel(NTree, sdict): #Levels = [NTree.subtrees.keys()] Levels = EqualLevels(NTree.subtrees.keys(), sdict) LowerLevels = [GroupByLevel(t, sdict) for t in NTree.subtrees.values()] if len(LowerLevels) > 0: h = max([len(l) for l in LowerLevels]) for i in range(h): New = utils.uniqify( utils.listunion([l[i] for l in LowerLevels if len(l) > i])) if len(New) > 0: Levels += [New] return Levels
def GroupByLevel(NTree, sdict): #Levels = [NTree.subtrees.keys()] Levels = EqualLevels(NTree.subtrees.keys(), sdict) LowerLevels = [GroupByLevel(t, sdict) for t in NTree.subtrees.values()] if len(LowerLevels) > 0: h = max([len(l) for l in LowerLevels]) for i in range(h): New = utils.uniqify(utils.listunion([l[i] for l in LowerLevels if len(l) > i])) if len(New) > 0: Levels += [New] return Levels
def __getitem__(self, ind): """ Returns a subrectangle of the table. The representation of the subrectangle depends on `type(ind)`. Also, whether the returned object represents a new independent copy of the subrectangle, or a "view" into this self object, depends on `type(ind)`. * If you pass the name of an existing coloring, you get a tabarray consisting of copies of columns in that coloring. * If you pass a list of existing coloring names and/or column names, you get a tabarray consisting of copies of columns in the list (name of coloring is equivalent to list of names of columns in that coloring; duplicate columns are deleted). * If you pass a :class:`numpy.ndarray`, you get a tabarray consisting a subrectangle of the tabarray, as handled by :func:`numpy.ndarray.__getitem__`: * if you pass a 1D NumPy ndarray of booleans of `len(self)`, the rectangle contains copies of the rows for which the corresponding entry is `True`. * if you pass a list of row numbers, you get a tabarray containing copies of these rows. """ if ind in self.coloring.keys(): return self[self.coloring[ind]] elif isinstance(ind, list) and self.dtype.names and \ all([a in self.dtype.names or a in self.coloring.keys() for a in ind]) and \ set(self.coloring.keys()).intersection(ind): ns = utils.uniqify( utils.listunion( [[a] if a in self.dtype.names else self.coloring[a] for a in ind])) return self[ns] else: D = np.ndarray.__getitem__(self, ind) if isinstance(D, np.ndarray) and not (D.dtype.names is None): D = D.view(tabarray) D.coloring = dict([( k, list( set(self.coloring[k]).intersection(set(D.dtype.names))) ) for k in self.coloring.keys() if len( set(self.coloring[k]).intersection(set(D.dtype.names))) > 0 ]) return D
def test_aggregate_AggFunc(self): AggFunc=np.mean [D1,s] = self.D[['a', 'b', 'e']].aggregate( On=['e'], AggFunc=AggFunc,returnsort=True) e = utils.uniqify(self.D['e'][s]) a = [] b = [] for i in e: boolvec = self.D['e'][s] == i a += [AggFunc(self.D['a'][s][boolvec])] b += [AggFunc(self.D['b'][s][boolvec])] D2 = tb.tabarray(columns=[e,a,b], names=['e','a','b'], coloring=D1.coloring) self.assert_(eq(D1,D2))
def TestIsIn(): Y = np.random.randint(0, 10000, size=(100,)) X = np.arange(10000) Z = isin(X, Y) D = np.array(uniqify(Y)) D.sort() T1 = (X[Z] == D).all() X = np.array(range(10000) + range(10000)) Z = isin(X, Y) T2 = (X[Z] == np.append(D, D.copy())).all() assert T1 & T2
def test_aggregate1(self): AggFuncDict = {'d': ','.join} [D1,s] = self.D[['a', 'b', 'd']].aggregate( On=['a'], AggFuncDict=AggFuncDict,returnsort=True) a = utils.uniqify(self.D['a'][s]) AggFuncDict.update({'b': sum}) b = [] d = [] for i in a: boolvec = self.D['a'][s] == i b += [AggFuncDict['b'](self.D['b'][s][boolvec])] d += [AggFuncDict['d'](self.D['d'][s][boolvec])] D2 = tb.tabarray(columns=[a, b, d], names=['a', 'b', 'd'], coloring=D1.coloring) self.assert_(eq(D1, D2))
def TestPivot3(): V1 = ['NorthAmerica', 'SouthAmerica', 'Europe', 'Asia', 'Australia', 'Africa', 'Antarctica'] V1.sort() V2 = ['House', 'Car', 'Boat', 'Savings', 'Food', 'Entertainment', 'Taxes'] V2.sort() Recs = [(a, b, 100 * np.random.rand()) for a in V1 for b in V2] X = tb.tabarray(records=Recs, names=['Region', 'Source', 'Amount']) Y = X.pivot('Region', 'Source') Z = utils.uniqify(X['Source']) Z.sort() Cols = [[y['Amount'] for y in X if y['Source'] == b] for b in Z] W = tb.tabarray(columns=[V1] + Cols, names=['Region'] + [b + '_Amount' for b in Z]) assert (W == Y).all()
def __getitem__(self, ind): """ Returns a subrectangle of the table. The representation of the subrectangle depends on `type(ind)`. Also, whether the returned object represents a new independent copy of the subrectangle, or a "view" into this self object, depends on `type(ind)`. * If you pass the name of an existing coloring, you get a tabarray consisting of copies of columns in that coloring. * If you pass a list of existing coloring names and/or column names, you get a tabarray consisting of copies of columns in the list (name of coloring is equivalent to list of names of columns in that coloring; duplicate columns are deleted). * If you pass a :class:`numpy.ndarray`, you get a tabarray consisting a subrectangle of the tabarray, as handled by :func:`numpy.ndarray.__getitem__`: * if you pass a 1D NumPy ndarray of booleans of `len(self)`, the rectangle contains copies of the rows for which the corresponding entry is `True`. * if you pass a list of row numbers, you get a tabarray containing copies of these rows. """ if ind in self.coloring.keys(): return self[self.coloring[ind]] elif isinstance(ind,list) and \ all([a in self.dtype.names or a in self.coloring.keys() for a in ind]) and \ set(self.coloring.keys()).intersection(ind): ns = utils.uniqify(utils.listunion([[a] if a in self.dtype.names else self.coloring[a] for a in ind])) return self[ns] else: D = np.ndarray.__getitem__(self,ind) if isinstance(D,np.ndarray) and not D.dtype.names is None: D = D.view(tabarray) D.coloring = dict([(k, list(set(self.coloring[k]).intersection(set(D.dtype.names)))) for k in self.coloring.keys() if len(set(self.coloring[k]).intersection(set(D.dtype.names))) > 0 and len(set(D.dtype.names).difference(self.coloring[k])) > 0]) return D
def test_aggregate2(self): AggFuncDict = {'c': '+'.join, 'd': ','.join} [D1,s] = self.D[['a', 'c', 'b', 'd']].aggregate( On=['a', 'b'], AggFuncDict=AggFuncDict,returnsort=True) ab = utils.uniqify(zip(self.D['a'][s], self.D['b'][s])) c = [] d = [] for i in ab: boolvec = np.array([tuple(self.D[['a', 'b']][s][ind])==i for ind in range(len(self.D))]) c += [AggFuncDict['c'](self.D['c'][s][boolvec])] d += [AggFuncDict['d'](self.D['d'][s][boolvec])] D2 = tb.tabarray( columns=[[x[0] for x in ab],[x[1] for x in ab], c,d], names=['a', 'b','c', 'd'], coloring=D1.coloring) self.assert_(eq(D1, D2))
def tab_colstack(ListOfTabArrays, mode='abort'): """ "Horizontal stacking" of tabarrays, e.g. adding columns. Wrapper for :func:`tabular.spreadsheet.colstack` that deals with the coloring and returns the result as a tabarray. Method calls:: data = tabular.spreadsheet.colstack(ListOfTabArrays, mode=mode) """ (data, naming) = spreadsheet.colstack(ListOfTabArrays, mode=mode, returnnaming=True) coloring = {} for (i, a) in enumerate(ListOfTabArrays): namedict = dict([(x, y) for (j, x, y) in naming if i == j]) for k in a.coloring: s = [namedict[kk] for kk in a.coloring[k]] if k in coloring.keys(): coloring[k] = utils.uniqify(coloring[k] + s) else: coloring[k] = s for k in coloring.keys(): s = [x for x in coloring[k] if x in data.dtype.names] if len(s) > 0: coloring[k] = s else: coloring.pop(k) data = data.view(tabarray) data.coloring = coloring return data
def TestUniqify2(): Input = [2, 3, 4, 4, 4, 5, 5, 1, 1, 2, 3, 6, 6, 5] Output = [2, 3, 4, 5, 1, 6] assert (Output == uniqify(Input))