Ejemplo n.º 1
0
def tab_join(ToMerge,
             keycols=None,
             nullvals=None,
             renamer=None,
             returnrenaming=False,
             Names=None):
    '''
    Database-join for tabular arrays.

    Wrapper for :func:`tabular.spreadsheet.join` that deals with the coloring 
    and returns the result as a tabarray.

    Method calls::

            data = tabular.spreadsheet.join

    '''

    [Result, Renaming] = spreadsheet.join(ToMerge,
                                          keycols=keycols,
                                          nullvals=nullvals,
                                          renamer=renamer,
                                          returnrenaming=True,
                                          Names=Names)

    if isinstance(ToMerge, dict):
        Names = ToMerge.keys()
    else:
        Names = range(len(ToMerge))

    Colorings = dict([
        (k, ToMerge[k].coloring) if 'coloring' in dir(ToMerge[k]) else {}
        for k in Names
    ])
    for k in Names:
        if k in Renaming.keys():
            l = ToMerge[k]
            Colorings[k] = \
                dict([(g, [n if not n in Renaming[k].keys() else Renaming[k][n]
                           for n in l.coloring[g]]) for g in Colorings[k].keys()])
    Coloring = {}
    for k in Colorings.keys():
        for j in Colorings[k].keys():
            if j in Coloring.keys():
                Coloring[j] = utils.uniqify(Coloring[j] + Colorings[k][j])
            else:
                Coloring[j] = utils.uniqify(Colorings[k][j])

    Result = Result.view(tabarray)
    Result.coloring = Coloring

    if returnrenaming:
        return [Result, Renaming]
    else:
        return Result
Ejemplo n.º 2
0
def tab_rowstack(ListOfTabArrays, mode='nulls'):
    """
    "Vertical stacking" of tabarrays, e.g. adding rows.

    Wrapper for :func:`tabular.spreadsheet.rowstack` that deals with the 
    coloring and returns the result as a tabarray.

    Method calls::

        data = tabular.spreadsheet.rowstack(ListOfTabArrays, mode=mode)

    """
    data = spreadsheet.rowstack(ListOfTabArrays, mode=mode)

    coloring = {}
    for a in ListOfTabArrays:
        for k in a.coloring:
            if k in coloring.keys():
                coloring[k] = utils.uniqify(coloring[k] + a.coloring[k])
            else:
                coloring[k] = a.coloring[k]
    for k in coloring.keys():
        s = [x for x in coloring[k] if x in data.dtype.names]
        if len(s) > 0:
            coloring[k] = s
        else:
            coloring.pop(k)

    data = data.view(tabarray)
    data.coloring = coloring
    return data
Ejemplo n.º 3
0
def tab_colstack(ListOfTabArrays, mode='abort'):
    '''
    "Horizontal stacking" of tabarrays, e.g. adding columns.

    Wrapper for :func:`tabular.spreadsheet.colstack` that deals with the 
    coloring and returns the result as a tabarray.

    Method calls::

            data = tabular.spreadsheet.colstack(ListOfTabArrays, mode=mode)

    '''

    data = spreadsheet.colstack(ListOfTabArrays, mode=mode)

    coloring = {}
    for a in ListOfTabArrays:
        for k in a.coloring:
            if k in coloring.keys():
                coloring[k] = utils.uniqify(coloring[k] + a.coloring[k])
            else:
                coloring[k] = a.coloring[k]

    for k in coloring.keys():
        s = [x for x in coloring[k] if x in data.dtype.names]
        if len(s) > 0:
            coloring[k] = s
        else:
            coloring.pop(k)

    data = data.view(tabarray)
    data.coloring = coloring
    return data
Ejemplo n.º 4
0
def tab_colstack(ListOfTabArrays, mode='abort'):
    """
    "Horizontal stacking" of tabarrays, e.g. adding columns.

    Wrapper for :func:`tabular.spreadsheet.colstack` that deals with the 
    coloring and returns the result as a tabarray.

    Method calls::

        data = tabular.spreadsheet.colstack(ListOfTabArrays, mode=mode)

    """
    (data, naming) = spreadsheet.colstack(ListOfTabArrays, mode=mode, 
                                          returnnaming=True)
        
    coloring = {}
    for (i, a) in enumerate(ListOfTabArrays):
        namedict = dict([(x,y) for (j,x,y) in naming if i == j])
        for k in a.coloring:
            s = [namedict[kk] for kk in a.coloring[k]]
            if k in coloring.keys():
                coloring[k] = utils.uniqify(coloring[k] + s)
            else:
                coloring[k] = s

    for k in coloring.keys():
        s = [x for x in coloring[k] if x in data.dtype.names]
        if len(s) > 0:
            coloring[k] = s
        else:
            coloring.pop(k)

    data = data.view(tabarray)
    data.coloring = coloring
    return data
Ejemplo n.º 5
0
def tab_join(ToMerge, keycols=None, nullvals=None, renamer=None, 
             returnrenaming=False, Names=None):
    '''
    Database-join for tabular arrays.

    Wrapper for :func:`tabular.spreadsheet.join` that deals with the coloring 
    and returns the result as a tabarray.

    Method calls::

            data = tabular.spreadsheet.join

    '''

    [Result,Renaming] = spreadsheet.join(ToMerge, keycols=keycols, 
          nullvals=nullvals, renamer=renamer, returnrenaming=True, Names=Names)

    if isinstance(ToMerge,dict):
        Names = ToMerge.keys()
    else:
        Names = range(len(ToMerge))

    Colorings = dict([(k,ToMerge[k].coloring) if 'coloring' in dir(ToMerge[k])  
                                              else {} for k in Names])
    for k in Names:
        if k in Renaming.keys():
            l = ToMerge[k]
            Colorings[k] = \
                dict([(g, [n if not n in Renaming[k].keys() else Renaming[k][n] 
                       for n in l.coloring[g]]) for g in Colorings[k].keys()])
    Coloring = {}
    for k in Colorings.keys():
        for j in Colorings[k].keys():
            if j in Coloring.keys():
                Coloring[j] = utils.uniqify(Coloring[j] + Colorings[k][j])
            else:
                Coloring[j] = utils.uniqify(Colorings[k][j])

    Result = Result.view(tabarray)
    Result.coloring = Coloring

    if returnrenaming:
        return [Result,Renaming]
    else:
        return Result
Ejemplo n.º 6
0
    def deletecols(self, cols):
        """
        Delete columns and/or colors.

        Method wraps::

                tabular.spreadsheet.deletecols(self, cols)

        """
        deletenames = utils.uniqify(utils.listunion([[c] if c in 
        self.dtype.names else self.coloring[c] for c in cols]))
        return spreadsheet.deletecols(self,deletenames)
Ejemplo n.º 7
0
def GroupByLevel(NTree, sdict):
    #Levels = [NTree.subtrees.keys()]
    Levels = EqualLevels(NTree.subtrees.keys(), sdict)
    LowerLevels = [GroupByLevel(t, sdict) for t in NTree.subtrees.values()]
    if len(LowerLevels) > 0:
        h = max([len(l) for l in LowerLevels])
        for i in range(h):
            New = utils.uniqify(
                utils.listunion([l[i] for l in LowerLevels if len(l) > i]))
            if len(New) > 0:
                Levels += [New]
    return Levels
Ejemplo n.º 8
0
def GroupByLevel(NTree, sdict):
    #Levels = [NTree.subtrees.keys()]
    Levels = EqualLevels(NTree.subtrees.keys(), sdict)
    LowerLevels = [GroupByLevel(t, sdict) for t in NTree.subtrees.values()]
    if len(LowerLevels) > 0:
        h = max([len(l) for l in LowerLevels])
        for i in range(h):
            New = utils.uniqify(utils.listunion([l[i] for l in LowerLevels 
                                                 if len(l) > i]))
            if len(New) > 0:
                Levels += [New]
    return Levels
Ejemplo n.º 9
0
    def __getitem__(self, ind):
        """
        Returns a subrectangle of the table.

        The representation of the subrectangle depends on `type(ind)`. Also, 
        whether the returned object represents a new independent copy of the 
        subrectangle, or a "view" into this self object, depends on 
        `type(ind)`.

        *	If you pass the name of an existing coloring, you get a tabarray 
        	consisting of copies of columns in that coloring.

        *	If you pass a list of existing coloring names and/or column names, 
        	you get a tabarray consisting of copies of columns in the list 
        	(name of coloring is equivalent to list of names of columns in that 
        	coloring; duplicate columns are deleted).

        *	If you pass a :class:`numpy.ndarray`, you get a tabarray consisting 
        	a subrectangle of the tabarray, as handled by  
        	:func:`numpy.ndarray.__getitem__`:

                *	if you pass a 1D NumPy ndarray of booleans of `len(self)`,    
                	the rectangle contains copies of the rows for which the 
                	corresponding entry is `True`.

                *	if you pass a list of row numbers, you get a tabarray
                	containing copies of these rows.

        """
        if ind in self.coloring.keys():
            return self[self.coloring[ind]]
        elif isinstance(ind, list) and self.dtype.names and \
            all([a in self.dtype.names or a in self.coloring.keys()
                 for a in ind]) and \
            set(self.coloring.keys()).intersection(ind):
            ns = utils.uniqify(
                utils.listunion(
                    [[a] if a in self.dtype.names else self.coloring[a]
                     for a in ind]))
            return self[ns]
        else:
            D = np.ndarray.__getitem__(self, ind)
            if isinstance(D, np.ndarray) and not (D.dtype.names is None):
                D = D.view(tabarray)
                D.coloring = dict([(
                    k,
                    list(
                        set(self.coloring[k]).intersection(set(D.dtype.names)))
                ) for k in self.coloring.keys() if len(
                    set(self.coloring[k]).intersection(set(D.dtype.names))) > 0
                                   ])
            return D
Ejemplo n.º 10
0
 def test_aggregate_AggFunc(self):
     AggFunc=np.mean
     [D1,s] = self.D[['a', 'b', 'e']].aggregate(
                                  On=['e'], AggFunc=AggFunc,returnsort=True)
     e = utils.uniqify(self.D['e'][s])
     a = []
     b = []
     for i in e:
         boolvec = self.D['e'][s] == i
         a += [AggFunc(self.D['a'][s][boolvec])]
         b += [AggFunc(self.D['b'][s][boolvec])]
     D2 = tb.tabarray(columns=[e,a,b], names=['e','a','b'], coloring=D1.coloring)
     self.assert_(eq(D1,D2))
Ejemplo n.º 11
0
def TestIsIn():
    Y = np.random.randint(0, 10000, size=(100,))
    X = np.arange(10000)
    Z = isin(X, Y)
    D = np.array(uniqify(Y))
    D.sort()
    T1 = (X[Z] == D).all()

    X = np.array(range(10000) + range(10000))
    Z = isin(X, Y)
    T2 = (X[Z] == np.append(D, D.copy())).all()

    assert T1 & T2
Ejemplo n.º 12
0
 def test_aggregate1(self):
     AggFuncDict = {'d': ','.join}
     [D1,s] = self.D[['a', 'b', 'd']].aggregate(
                          On=['a'], AggFuncDict=AggFuncDict,returnsort=True)
     a = utils.uniqify(self.D['a'][s])
     AggFuncDict.update({'b': sum})
     b = []
     d = []
     for i in a:
         boolvec = self.D['a'][s] == i
         b += [AggFuncDict['b'](self.D['b'][s][boolvec])]
         d += [AggFuncDict['d'](self.D['d'][s][boolvec])]
     D2 = tb.tabarray(columns=[a, b, d], names=['a', 'b', 'd'], coloring=D1.coloring)
     self.assert_(eq(D1, D2))
Ejemplo n.º 13
0
def TestPivot3():
    V1 = ['NorthAmerica', 'SouthAmerica', 'Europe', 'Asia', 'Australia', 
          'Africa', 'Antarctica']
    V1.sort()
    V2 = ['House', 'Car', 'Boat', 'Savings', 'Food', 'Entertainment', 'Taxes']
    V2.sort()
    Recs = [(a, b, 100 * np.random.rand()) for a in V1 for b in V2]
    X = tb.tabarray(records=Recs, names=['Region', 'Source', 'Amount'])
    Y = X.pivot('Region', 'Source')
    Z = utils.uniqify(X['Source'])
    Z.sort()
    Cols = [[y['Amount'] for y in X if y['Source'] == b] for b in Z]
    W = tb.tabarray(columns=[V1] + Cols, 
                    names=['Region'] + [b + '_Amount' for b in Z])
    assert (W == Y).all()
Ejemplo n.º 14
0
    def __getitem__(self, ind):
        """
        Returns a subrectangle of the table.

        The representation of the subrectangle depends on `type(ind)`. Also, 
        whether the returned object represents a new independent copy of the 
        subrectangle, or a "view" into this self object, depends on 
        `type(ind)`.

        *	If you pass the name of an existing coloring, you get a tabarray 
        	consisting of copies of columns in that coloring.

        *	If you pass a list of existing coloring names and/or column names, 
        	you get a tabarray consisting of copies of columns in the list 
        	(name of coloring is equivalent to list of names of columns in that 
        	coloring; duplicate columns are deleted).

        *	If you pass a :class:`numpy.ndarray`, you get a tabarray consisting 
        	a subrectangle of the tabarray, as handled by  
        	:func:`numpy.ndarray.__getitem__`:

                *	if you pass a 1D NumPy ndarray of booleans of `len(self)`,    
                	the rectangle contains copies of the rows for which the 
                	corresponding entry is `True`.

                *	if you pass a list of row numbers, you get a tabarray
                	containing copies of these rows.

        """
        if ind in self.coloring.keys():
            return self[self.coloring[ind]]
        elif isinstance(ind,list) and \
             all([a in self.dtype.names or a in self.coloring.keys() 
                                                           for a in ind]) and \
             set(self.coloring.keys()).intersection(ind):
            ns = utils.uniqify(utils.listunion([[a] if a in self.dtype.names 
                                          else self.coloring[a] for a in ind]))
            return self[ns]
        else:
            D = np.ndarray.__getitem__(self,ind)
            if isinstance(D,np.ndarray) and not D.dtype.names is None:
                D = D.view(tabarray)
                D.coloring = dict([(k, 
                list(set(self.coloring[k]).intersection(set(D.dtype.names)))) 
                for k in self.coloring.keys() if 
                len(set(self.coloring[k]).intersection(set(D.dtype.names))) > 0 
                and len(set(D.dtype.names).difference(self.coloring[k])) > 0])
            return D
Ejemplo n.º 15
0
 def test_aggregate2(self):
     AggFuncDict = {'c': '+'.join, 'd': ','.join}
     [D1,s] = self.D[['a', 'c', 'b', 'd']].aggregate(
                     On=['a', 'b'], AggFuncDict=AggFuncDict,returnsort=True)
     ab = utils.uniqify(zip(self.D['a'][s], self.D['b'][s]))
     c = []
     d = []
     for i in ab:
         boolvec = np.array([tuple(self.D[['a', 'b']][s][ind])==i 
                             for ind in range(len(self.D))])
         c += [AggFuncDict['c'](self.D['c'][s][boolvec])]
         d += [AggFuncDict['d'](self.D['d'][s][boolvec])]
     D2 = tb.tabarray(
          columns=[[x[0] for x in ab],[x[1] for x in ab], c,d], 
          names=['a', 'b','c', 'd'], coloring=D1.coloring)
     self.assert_(eq(D1, D2))
Ejemplo n.º 16
0
def tab_colstack(ListOfTabArrays, mode='abort'):
    """
    "Horizontal stacking" of tabarrays, e.g. adding columns.

    Wrapper for :func:`tabular.spreadsheet.colstack` that deals with the 
    coloring and returns the result as a tabarray.

    Method calls::

        data = tabular.spreadsheet.colstack(ListOfTabArrays, mode=mode)

    """
    (data, naming) = spreadsheet.colstack(ListOfTabArrays,
                                          mode=mode,
                                          returnnaming=True)

    coloring = {}
    for (i, a) in enumerate(ListOfTabArrays):
        namedict = dict([(x, y) for (j, x, y) in naming if i == j])
        for k in a.coloring:
            s = [namedict[kk] for kk in a.coloring[k]]
            if k in coloring.keys():
                coloring[k] = utils.uniqify(coloring[k] + s)
            else:
                coloring[k] = s

    for k in coloring.keys():
        s = [x for x in coloring[k] if x in data.dtype.names]
        if len(s) > 0:
            coloring[k] = s
        else:
            coloring.pop(k)

    data = data.view(tabarray)
    data.coloring = coloring
    return data
Ejemplo n.º 17
0
def TestUniqify2():
    Input = [2, 3, 4, 4, 4, 5, 5, 1, 1, 2, 3, 6, 6, 5]
    Output = [2, 3, 4, 5, 1, 6]
    assert (Output == uniqify(Input))