def transform(self, data, path ): debug( "%s: called" % str(self)) if Utils.isArray( data ): return Stats.Summary( data )._data else: return None
def transform(self, data, path): debug( "%s: called for path %s" % (str(self), str(path))) if not Utils.isArray( data ): return None bins, values = self.toHistogram(data) if bins != None: for converter in self.mConverters: values = converter(values) debug( "%s: completed for path %s" % (str(self), str(path))) header = "bins" #if len(path) > 1: header = path[-1] #else: header = "bins" return odict( ((header, bins), ("frequency", values)))
def transform(self, data, path ): debug( "%s: called" % str(self)) t = odict() for minor_key, values in data.items(): if not Utils.isArray(values): raise ValueError("expected a list for data frame creation, got %s", type(data)) if len(values) == 0: raise ValueError( "empty list for %s" % (minor_key)) v = values[0] if Utils.isInt( v ): t[minor_key] = rpy2.robjects.IntVector( values ) elif Utils.isFloat(v): t[minor_key] = rpy2.robjects.FloatVector( values ) else: t[minor_key] = rpy2.robjects.StrVector( values ) return rpy2.robjects.DataFrame(t)
def render(self, work, path ): self.startPlot() plts, legend = [], [] all_data = [] for line, data in work.iteritems(): assert len(data) == 1, "multicolumn data not supported yet: %s" % str(data) for label, values in data.iteritems(): assert Utils.isArray( values ), "work is of type '%s'" % values d = [ x for x in values if x != None ] if len(d) > 0: all_data.append( ro.FloatVector( d ) ) legend.append( "/".join((line,label))) R.boxplot( all_data ) return self.endPlot( work, path )
def asDataFrame( data ): '''return data tree as a pandas series. The data frame is multi-indexed according to the depth within the data tree. If the data-tree has only one level, the data will be single-indexed because pandas will not tolerate a single level MultiIndex. The code assumes that the data tree has a uniform depth and structure. The inner-most level in the *data* tree will be columns. However, if *data* is only a single-level dictionary, the keys in the dictionary will be row labels and the resultant dataframe will have only one column. Depending on the type of the leaf, the data frame is constructed as follows: Leaf is an array Leaves on the same branch are added as columns Requires: All leaves on the same branch need to have the same length. Leaf is a dataframe Dataframes will be concatenated. Requires: All dataframes need to have the same columns. Leaf is a scalar Dataframes will be built from a nested dictionary Special cases for backwards compatibility: 1. Lowest level dictionary contains the following arrays: rows, columns, matrix - numpy matrix, convert to dataframe and apply as above 2. Lowest level dictionary contains the following keys: '01', '10', '11' - Venn 2-set data, convert columns '001', '010', ... - Venn 3-set data, convert columns Pandas attempts to find a column data type that will fit all values in a column. Thus, if a column is numeric, but contains values such as "inf", "Inf", as well, the column type might be set to object or char. ''' if data is None or len(data) == 0: return None levels = getDepths( data ) mi, ma = min(levels), max(levels) if mi != ma: raise NotImplementedError( 'data tree not of uniform depth, min=%i, max=%i' %(mi,ma)) labels = getPaths( data ) ###################################################### ###################################################### ###################################################### # check special cases MATRIX = ('rows', 'columns', 'matrix') VENN2 = ('10', '01', '11') VENN3 = ('010', '001', '011') branches = list(getNodes( data, len(labels) -2 )) for path, branch in branches: # Numpy matrix - dictionary with keys matrix, rows, columns if len(set(branch.keys()).intersection( MATRIX )) == len(MATRIX): df = pandas.DataFrame( branch['matrix'], columns = branch['columns'], index = branch['rows'] ) setLeaf( data, path, df ) elif len(set(branch.keys()).intersection( VENN2 )) == len(VENN2) or \ len(set(branch.keys()).intersection( VENN3 )) == len(VENN3): # sort so that 'labels' is not the first item # specify data such that 'labels' will a single tuple entry values = sorted( branch.items() ) df = pandas.DataFrame( [x[1] for x in values], index = [x[0] for x in values] ) setLeaf( data, path, df ) ###################################################### ###################################################### ###################################################### labels = getPaths( data ) # build multi-index leaves = list(getNodes( data, len(labels) -1 )) leaf = leaves[0][1] if Utils.isArray( leaf ): # build dataframe from array dataframes = [] index_tuples = [] # not a nested dictionary if len(labels) == 1: branches = [('all', data)] else: branches = list(getNodes( data, max(0, len(labels)-2 )) ) for path, leaves in branches: dataframe = pandas.DataFrame( leaves ) dataframes.append( dataframe ) if len(path) == 1: # if only one level, do not use tuple index_tuples.append( path ) else: index_tuples.append( path ) df = pandas.concat( dataframes, keys = index_tuples ) elif Utils.isDataFrame( leaf ): # build dataframe from list of dataframes # dataframes are not stacked # the existing index of dataframes # will be overwritten dataframes = [] index_tuples = [] for path, dataframe in leaves: if len(path) == 1: # if only one level, do not use tuple index_tuples.append( path[0] ) else: index_tuples.append( path ) dataframes.append( dataframe ) df = pandas.concat( dataframes, keys = index_tuples ) else: if len(labels) == 1: # { 'x' : 1, 'y': 2 } -> DF with one row and two columns (x, y) df = pandas.DataFrame( data.values(), index = data.keys() ) elif len(labels) == 2: # { 'a': {'x':1, 'y':2}, 'b': {'y',2} -> DF with two columns(x,y) and two rows(a,b) df = pandas.DataFrame.from_dict( data ).transpose() # reorder so that order of columns corresponds to data df = df[labels[-1]] else: # We are dealing with a simple nested dictionary branches = list(getNodes( data, max(0, len(labels)-3 )) ) dataframes = [] index_tuples = [] for path, nested_dict in branches: # transpose to invert columns and rows # in sphinxreport convention, the deeper # level in a dictionary in sphinxreport are columns, while # in pandas they are rows. df = pandas.DataFrame( nested_dict ).transpose() dataframes.append( df ) index_tuples.extend( [path] ) df = pandas.concat( dataframes, keys = index_tuples) # rename levels in hierarchical index is_hierarchical = isinstance( df.index, pandas.core.index.MultiIndex ) if is_hierarchical: d = ['track','slice'] + ['level%i' for i in range( len(df.index.names) - 2 )] df.index.names = d[:len(df.index.names)] else: df.index.name = 'track' return df