def render( self, work, path ): """render the data. """ results = ResultBlocks( title = path ) matrix, rows, columns = self.buildMatrix( work ) title = path2str(path) if len(rows) == 0: return ResultBlocks( ResultBlock( "", title = title) ) # do not output large matrices as rst files if not self.force and (len(rows) > self.max_rows or len(columns) > self.max_cols): return self.asFile( [ [ self.toString(x) for x in r ] for r in matrix ], rows, columns, title ) lines = [] lines.append( ".. csv-table:: %s" % title ) lines.append( ' :header: "track","%s" ' % '","'.join( columns ) ) lines.append( '') for x in range(len(rows)): lines.append( ' "%s","%s"' % (rows[x], '","'.join( [ self.toString(x) for x in matrix[x,:] ] ) ) ) lines.append( "") if not path: subtitle = "" else: subtitle = path2str(path) results.append( ResultBlock( "\n".join(lines), title = subtitle ) ) return results
def __call__(self, data, path ): '''iterate over leaves/branches in data structure. This method will call the :meth:`render` method for each leaf/branch at level :attr:`nlevels`. ''' if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self)) result = ResultBlocks( title = path2str(path) ) labels = DataTree.getPaths( data ) if len(labels) < self.nlevels: self.warn( "at %s: expected at least %i levels - got %i: %s" %\ (str(path), self.nlevels, len(labels), str(labels)) ) result.append( EmptyResultBlock( title = path2str(path) ) ) return result paths = list(itertools.product( *labels[:-self.nlevels] )) for p in paths: work = DataTree.getLeaf( data, p ) if not work: continue try: result.extend( self.render( work, path + p ) ) except: self.warn("exeception raised in rendering for path: %s" % str(path+p)) raise return result
def render( self, work, path ): # initiate output structure results = ResultBlocks( title = path2str(path) ) labels = DataTree.getPaths( work ) # iterate over all items at leaf for path, branch in DataTree.getNodes( work, len(labels) - 2 ): for key in Utils.TrackerKeywords: if key in branch: # add a result block results.append( ResultBlock( branch[key], title = path2str(path) ) ) return results
def __call__(self, data, path): matrix, row_headers, col_headers = self.buildTable( data ) title = path2str(path) if matrix == None: return ResultBlocks( ResultBlock( "\n".join(lines), title = title) ) # do not output large matrices as rst files if self.separate or (not self.force and (len(row_headers) > self.max_rows or len(col_headers) > self.max_cols)): return ResultBlocks( self.asFile( matrix, row_headers, col_headers, title ), title = title ) lines = [] # add row and column headers matrix.insert( 0, col_headers ) max_widths = [ max( len(x) for x in row_headers ) ] max_widths.extend( [ max([len(str(row[x])) for row in matrix]) for x in range(len(col_headers)) ] ) separator = "+" + "+".join( [ "-" * (x + 2) for x in max_widths ] ) + "+" format = "|" + "|".join( [" %%%is " % x for x in max_widths] ) + "|" lines.append( separator ) lines.append( format % tuple( [""] + list(map(str,col_headers)) )) lines.append( separator ) for h, row in zip(row_headers,matrix[1:]): lines.append( format % tuple(map(str,[h] + row) )) lines.append( separator ) return ResultBlocks( ResultBlock( "\n".join(lines), title = title) )
def render( self, work, path ): # initiate output structure results = ResultBlocks( title = path ) # iterate over all items at leaf for key in work: t = type(work[key]) try: l = "%i" % len(work[key]) except AttributeError: l = "na" except TypeError: l = "na" # add a result block. data = str(work[key]) if len(data) > 30: data=data[:30] + "..." results.append( ResultBlock( "path= %s, type= %s, len= %s, data= %s" % \ ( path2str(path + (key,)), t, l, data), title = "") ) return results
def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() ws.append( [""] + list(col_headers) ) for x,row in enumerate( dataframe.iterrows() ): ws.append( [path2str(row_headers[x])] + list(row) ) # patch: maximum title length seems to be 31 ws.title = title[:30]
def __call__(self, dataframe, path): # modify table (adding/removing columns) according to user options # matrix, row_headers, col_headers = self.modifyTable( matrix, row_headers, col_headers ) dataframe = self.modifyTable( dataframe ) title = path2str(path) results = ResultBlocks() row_headers = dataframe.index col_headers = dataframe.columns # do not output large matrices as rst files if self.separate or (not self.force and (len(row_headers) > self.max_rows or len(col_headers) > self.max_cols)): if self.large == "xls": results.append( self.asSpreadSheet( dataframe, row_headers, col_headers, title ) ) else: results.append( self.asFile( dataframe, row_headers, col_headers, title ) ) if self.preview: row_headers = row_headers[:self.max_rows] col_headers = col_headers[:self.max_cols] matrix = [ x[:self.max_cols] for x in matrix[:self.max_rows] ] else: return results out = StringIO.StringIO() dataframe.to_csv( out ) lines = [] lines.append( ".. csv-table:: %s" % title ) lines.append( " :class: sortable" ) if self.add_rowindex: lines.append( ' :header: "row", "", "%s" ' % '","'.join( map(str, col_headers) ) ) lines.append( '' ) x = 0 for header, line in zip( row_headers, matrix ): x += 1 lines.append( ' %i,"%s","%s"' % (x, str(header), '","'.join( map(str, line) ) ) ) else: l = out.getvalue().split("\n") lines.append( ' :header: %s' % l[0] ) lines.append( '' ) lines.extend( [' %s' % x for x in l[1:] ] ) lines.append( "") results.append( ResultBlock( "\n".join(lines), title = title) ) return results
def render( self, work, path ): """render the data. """ results = ResultBlocks( title = path ) matrix, rows, columns = self.buildMatrix( work ) title = path2str(path) if len(rows) == 0: return ResultBlocks( ResultBlock( "", title = title) ) # do not output large matrices as rst files # separate and force need to be mixed in. if self.separate or (not self.force and (len(rows) > self.max_rows or len(columns) > self.max_cols)): return ResultBlocks( self.asFile( pandas.DataFrame( matrix, index = rows, columns = columns), rows, columns, title ), title = path ) lines = [] lines.append( ".. csv-table:: %s" % title ) lines.append( " :class: sortable" ) lines.append( ' :header: "track","%s" ' % '","'.join( columns ) ) lines.append( '') for x in range(len(rows)): lines.append( ' "%s","%s"' % (rows[x], '","'.join( [ self.toString(x) for x in matrix[x,:] ] ) ) ) lines.append( "") if path is None: subtitle = "" else: subtitle = path2str(path) results.append( ResultBlock( "\n".join(lines), title = subtitle ) ) return results
def render(self, dataframe, path ): R.library( 'ggplot2' ) rframe = pandas.rpy.common.convert_to_r_dataframe(dataframe) # sometimes the row/column mapping did not work # rframe.colnames = dataframe.columns unAsIs = R('''function (x) { if(typeof(x) %in% c("integer","double")) { class(x) <- "numeric" return (x)} else if (typeof(x) == "character") { class(x) <- "character" return (x) } else { return(x) } }''') rframe = R["as.data.frame"](R.lapply(rframe,unAsIs)) R.assign( "rframe", rframe ) # start plot R('''gp = ggplot( rframe )''') # add aesthetics and geometries try: pp = R('''gp + %s ''' % self.statement ) except ValueError as msg: raise ValueError( "could not interprete R statement: gp + %s; msg=%s" % (self.statement, msg )) figname = re.sub( '/', '_', path2str(path) ) r = ResultBlock( '#$ggplot %s$#' % figname, title = path2str(path) ) r.rggplot = pp r.figname = figname return ResultBlocks( r )
def __call__(self, data, path ): lines = [] matrix, row_headers, col_headers = self.buildTable( data ) if matrix == None: return lines title = path2str(path) lines.append( ".. glossary::" ) lines.append( "") for header, line in zip( row_headers, matrix ): txt = "\n".join( line ) txt = "\n ".join( [ x.strip() for x in txt.split("\n" ) ] ) lines.append( ' %s\n %s' % ( header, txt ) ) lines.append( "") return ResultBlocks( "\n".join(lines), title = title )
def __call__(self, data, path): matrix, row_headers, col_headers = self.buildTable( data ) title = path2str(path) if matrix == None: return ResultBlocks( ResultBlock( "\n".join(lines), title = title) ) # do not output large matrices as rst files if not self.force and (len(row_headers) > self.max_rows or len(col_headers) > self.max_cols): return self.asFile( matrix, row_headers, col_headers, title ) lines = [] lines.append( ".. csv-table:: %s" % title ) lines.append( ' :header: "", "%s" ' % '","'.join( map(str, col_headers) ) ) lines.append( '' ) for header, line in zip( row_headers, matrix ): lines.append( ' "%s","%s"' % (str(header), '","'.join( map(str, line) ) ) ) lines.append( "") return ResultBlocks( ResultBlock( "\n".join(lines), title = title) )
def asSpreadSheet( self, dataframe, row_headers, col_headers, title ): '''save the table as an xls file. Multiple files of the same Renderer/Tracker combination are distinguished by the title. ''' self.debug("%s: saving %i x %i table as spread-sheet'"% (id(self), len(row_headers), len(col_headers))) quick = len(dataframe) > 10000 if quick: # quick writing, only append method works wb = openpyxl.Workbook( optimized_write = True) def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() ws.append( [""] + list(col_headers) ) for x,row in enumerate( dataframe.iterrows() ): ws.append( [path2str(row_headers[x])] + list(row) ) # patch: maximum title length seems to be 31 ws.title = title[:30] else: # do it cell-by-cell, this might be slow wb = openpyxl.Workbook( optimized_write = False) def addWorksheet( wb, dataframe, title ): ws = wb.create_sheet() # regex to detect rst hypelinks regex_link = re.compile( '`(.*) <(.*)>`_') for column, column_name in enumerate( dataframe.columns ): c = ws.cell( row=0, column=column) c.value = column_name dataseries = dataframe[column_name] if dataseries.dtype == object: for row, value in enumerate( dataseries ): c = ws.cell( row=row+1, column=column) value = str(value) if value.startswith('`'): c.value, c.hyperlink = regex_link.match( value ).groups() else: c.value = value else: for row, value in enumerate( dataseries ): c = ws.cell( row=row+1, column=column) c.value = value # patch: maximum title length seems to be 31 ws.title = title[:30] is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex ) split = is_hierarchical and len(dataframe.index.levels) > 1 if split: # create separate worksheets for nested indices nlevels = len(dataframe.index.levels) paths = map( tuple, DataTree.unique( [ x[:nlevels-1] for x in dataframe.index.unique() ] )) ws = wb.worksheets[0] ws.title = 'Summary' ws.append( [dataframe.index.labels[:nlevels-1]] + ["Worksheet", "Rows" ] ) for row, path in enumerate(paths): # select data frame as cross-section work = dataframe.xs(path, axis=0 ) title = path2str( path )[:30] ws.append( list(path) + [title, len(work)] ) c = ws.cell( row = row+1, column = nlevels ) c.hyperlink = "#%s" % title addWorksheet( wb, work, title = title ) else: writeWorksheet( wb, dataframe, title = title ) # write result block lines = [] lines.append("`%i x %i table <#$xls %s$#>`__" %\ (len(row_headers), len(col_headers), title) ) lines.append( "" ) r = ResultBlock( "\n".join(lines), title = title) r.xls = wb self.debug("%s: saved %i x %i table as spread-sheet'"% (id(self), len(row_headers), len(col_headers))) return r