def render(self, dataframe, path): if len(dataframe.columns) < 2: raise ValueError( "requiring two coordinates, only got %s" % str(dataframe.columns)) plts, legend = [], [] blocks = ResultBlocks() for xcolumn, ycolumn in itertools.combinations(dataframe.columns, 2): # remove missing data points xvalues, yvalues = Stats.filterMissing( (dataframe[xcolumn], dataframe[ycolumn])) # remove columns with all NaN if len(xvalues) == 0 or len(yvalues) == 0: continue # apply log transformation on data not on plot if self.logscale: if "x" in self.logscale: xvalues = R.log10(xvalues) if "y" in self.logscale: yvalues = R.log10(yvalues) self.startPlot() # wrap, as pandas series can not # passed through rpy2. R.smoothScatter(numpy.array(xvalues, dtype=numpy.float), numpy.array(yvalues, dtype=numpy.float), xlab=xcolumn, ylab=ycolumn, nbin=self.nbins) blocks.extend(self.endPlot(dataframe, path)) return blocks
def layoutBlocks(blocks, layout="column"): """layout blocks of rst text. layout can be one of "column", "row", or "grid". The layout uses an rst table to arrange elements. """ lines = [] if len(blocks) == 0: return lines # flatten blocks bb = ResultBlocks() for b in blocks: if b.title: b.updateTitle(b.title, "prefix") try: bb.extend(b) except TypeError: bb.append(b) blocks = bb # check if postambles are identical across all blocks postambles = set([b.postamble for b in blocks]) if len(postambles) == 1: blocks.clearPostamble() postamble = postambles.pop() else: postamble = None if layout == "column": for block in blocks: if block.title: lines.extend(block.title.split("\n")) lines.append("") else: warn("report_directive.layoutBlocks: missing title") lines.extend(block.text.split("\n")) lines.extend(block.postamble.split("\n")) lines.append("") if postamble: lines.extend(postamble.split("\n")) lines.append("") return lines elif layout in ("row", "grid"): if layout == "row": ncols = len(blocks) elif layout == "grid": ncols = int(math.ceil(math.sqrt(len(blocks)))) elif layout.startswith("column"): ncols = min(len(blocks), int(layout.split("-")[1])) # TODO: think about appropriate fix for empty data if ncols == 0: ncols = 1 return lines else: raise ValueError("unknown layout %s " % layout) if ncols == 0: warn("no columns") return lines # compute column widths widths = [x.getWidth() for x in blocks] text_heights = [x.getTextHeight() for x in blocks] title_heights = [x.getTitleHeight() for x in blocks] columnwidths = [] for x in range(ncols): columnwidths.append(max([widths[y] for y in range(x, len(blocks), ncols)])) separator = "+%s+" % "+".join(["-" * x for x in columnwidths]) # add empty blocks if len(blocks) % ncols: blocks.extend([ResultBlock("", "")] * (ncols - len(blocks) % ncols)) for nblock in range(0, len(blocks), ncols): # add text lines.append(separator) max_height = max(text_heights[nblock:nblock + ncols]) new_blocks = ResultBlocks() for xx in range(nblock, min(nblock + ncols, len(blocks))): txt, col = blocks[xx].text.split("\n"), xx % ncols txt = blocks[xx].text.split("\n") + \ blocks[xx].postamble.split("\n") col = xx % ncols max_width = columnwidths[col] # add missig lines txt.extend([""] * (max_height - len(txt))) # extend lines txt = [x + " " * (max_width - len(x)) for x in txt] new_blocks.append(txt) for l in zip(*new_blocks): lines.append("|%s|" % "|".join(l)) # add subtitles max_height = max(title_heights[nblock:nblock + ncols]) if max_height > 0: new_blocks = ResultBlocks() lines.append(separator) for xx in range(nblock, min(nblock + ncols, len(blocks))): txt, col = blocks[xx].title.split("\n"), xx % ncols max_width = columnwidths[col] # add missig lines txt.extend([""] * (max_height - len(txt))) # extend lines txt = [x + " " * (max_width - len(x)) for x in txt] new_blocks.append(txt) for l in zip(*new_blocks): lines.append("|%s|" % "|".join(l)) lines.append(separator) if postamble: lines.append(postamble) lines.append("") return lines
def __call__(self, dataframe, path): '''iterate over leaves/branches in data structure. This method will call the:meth:`render` method ''' result = ResultBlocks() if not self.split_at: result.extend(self.render(dataframe, path)) else: # split dataframe at first index level = Utils.getGroupLevels(dataframe) grouper = dataframe.groupby(level=level) if len(grouper) < self.split_at: result.extend(self.render(dataframe, path)) else: # build groups always, remove_always = [], set() if self.split_always: for key, work in grouper: for pat in self.split_always: rx = re.compile(pat) if rx.search(path2str(key)): always.append((key, work)) remove_always.add(key) grouper = dataframe.groupby(level=level) def _group_group(grouper, always, remove_always): group = always[:] for key, work in grouper: if key in remove_always: continue group.append((key, work)) if len(group) >= self.split_at: yield group group = always[:] # reconcile index names yield group first = True for group in _group_group(grouper, always, remove_always): # do not plot last dataframe that contains # only the common tracks to plot if not first and len(group) == len(always): continue first = False df = pandas.concat( [x[1] for x in group]) # reconcile index names df.index.names = dataframe.index.names result.extend(self.render(df, path)) return result
def __call__(self, dataframe, path): '''iterate over leaves/branches in data structure. This method will call the:meth:`render` method. Large dataframes are split into multiple, smaller rendered objects if self.split_at is not zero. By default, dataframes are split along the hierachical index. However, if there is only a single index, but multiple columns, the split is performed on the columns instead. This is used when splitting coordinate data as a result of the histogram transformation. ''' result = ResultBlocks() if not self.split_at: result.extend(self.render(dataframe, path)) else: # split dataframe at first index level = Utils.getGroupLevels(dataframe) grouper = dataframe.groupby(level=level) # split dataframe column wise if only one index # and multiple columns if len(grouper) == 1 and len(dataframe.columns) > self.split_at: columns = list(dataframe.columns) always = [] if self.split_keep_first_column: always.append(columns[0]) # columns to always keep always.extend([c for c in columns if c in self.split_always]) columns = [c for c in columns if c not in always] for x in range(0, len(columns), self.split_at): # extract a set of columns result.extend(self.render( dataframe.loc[:, always+columns[x:x+self.split_at]], path)) # split dataframe along index elif len(grouper) >= self.split_at: # build groups always, remove_always = [], set() if self.split_always: for key, work in grouper: for pat in self.split_always: rx = re.compile(pat) if rx.search(path2str(key)): always.append((key, work)) remove_always.add(key) grouper = dataframe.groupby(level=level) def _group_group(grouper, always, remove_always): group = always[:] for key, work in grouper: if key in remove_always: continue group.append((key, work)) if len(group) >= self.split_at: yield group group = always[:] # reconcile index names yield group first = True for group in _group_group(grouper, always, remove_always): # do not plot last dataframe that contains # only the common tracks to plot if not first and len(group) == len(always): continue first = False df = pandas.concat( [x[1] for x in group]) # reconcile index names df.index.names = dataframe.index.names result.extend(self.render(df, path)) else: # do not split dataframe result.extend(self.render(dataframe, path)) return result
def __call__(self, *args, **kwargs): try: self.parseArguments(*args, **kwargs) except: self.error("%s: exception in parsing" % self) return ResultBlocks(Utils.buildException("parsing")) # collect no data if tracker is the empty tracker # and go straight to rendering try: if self.tracker.getTracks() == ["empty"]: # is instance does not work because of module mapping # type(Tracker.Empty) == CGATReport.Tracker.Empty # type(self.tracker) == Tracker.Empty # if isinstance(self.tracker, Tracker.Empty): return self.renderer() except AttributeError: # for function trackers pass self.debug("profile: started: tracker: %s" % (self.tracker)) # collecting data try: self.collect() except Exception as ex: self.error("%s: exception in collection: %s" % (self, str(ex))) return ResultBlocks( Utils.buildException("collection")) finally: self.debug("profile: finished: tracker: %s" % (self.tracker)) if self.tree is None or len(self.tree) == 0: self.info("%s: no data - processing complete" % self.tracker) return None data_paths = DataTree.getPaths(self.tree) self.debug("%s: after collection: %i data_paths: %s" % (self, len(data_paths), str(data_paths))) # special Renderers - do not process data further but render # directly. Note that no transformations will be applied. if isinstance(self.renderer, Renderer.User): results = ResultBlocks(title="main") results.extend(self.renderer(self.tree)) return results elif isinstance(self.renderer, Renderer.Debug): results = ResultBlocks(title="main") results.extend(self.renderer(self.tree)) return results # merge all data to hierarchical indexed dataframe self.data = DataTree.as_dataframe(self.tree, self.tracker) if self.data is None: self.info("%s: no data after conversion" % self.tracker) return None self.debug("dataframe memory usage: total=%i,data=%i,index=%i,col=%i" % (self.data.values.nbytes + self.data.index.nbytes + self.data.columns.nbytes, self.data.values.nbytes, self.data.index.nbytes, self.data.columns.nbytes)) # if tracks are set by tracker, call tracker with dataframe if self.indexFromTracker: self.tracker.setIndex(self.data) # transform data try: self.transform() except: self.error("%s: exception in transformation" % self) return ResultBlocks( Utils.buildException("transformation")) try: self.reframe() except: self.error("%s: exception in reframing" % self) return ResultBlocks(Utils.buildException("reframing")) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after transformation: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) # restrict try: self.filterPaths(self.restrict_paths, mode="restrict") except: self.error("%s: exception in restrict" % self) return ResultBlocks( Utils.buildException("restrict")) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after restrict: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) # exclude try: self.filterPaths(self.exclude_paths, mode="exclude") except: self.error("%s: exception in exclude" % self) return ResultBlocks(Utils.buildException("exclude")) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after exclude: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) # No pruning - maybe enable later as a user option self.pruned = [] # try: # self.prune() # except: # self.error("%s: exception in pruning" % self) # return ResultBlocks(ResultBlocks(Utils.buildException("pruning"))) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after pruning: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) try: self.group() except: self.error("%s: exception in grouping" % self) return ResultBlocks(Utils.buildException("grouping")) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after grouping: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) if self.renderer is not None: self.debug("profile: started: renderer: %s" % (self.renderer)) try: result = self.render() except: self.error("%s: exception in rendering" % self) return ResultBlocks( Utils.buildException("rendering")) finally: self.debug("profile: finished: renderer: %s" % (self.renderer)) else: result = ResultBlocks(title="") return result
def render(self): '''supply the:class:`Renderer.Renderer` with the data to render. The data supplied will depend on the ``groupby`` option. returns a ResultBlocks data structure. ''' self.debug("%s: rendering data started for %i items" % (self, len(self.data))) # initiate output structure results = ResultBlocks(title="") dataframe = self.data if dataframe is None: self.warn("%s: no data after conversion" % self) raise ValueError("no data for renderer") # special patch: set column names to pruned levels # if there are no column names if len(dataframe.columns) == len(self.pruned): if list(dataframe.columns) == list(range(len(dataframe.columns))): dataframe.columns = [x[1] for x in self.pruned] nlevels = Utils.getDataFrameLevels(dataframe) self.debug("%s: rendering data started. " "levels=%i, group_level=%s" % (self, nlevels, str(self.group_level))) if self.group_level < 0: # no grouping for renderers that will accept # a dataframe with any level of indices and no explicit # grouping has been asked for. results.extend(self.renderer(dataframe, path=())) else: level = Utils.getGroupLevels( dataframe, max_level=self.group_level + 1) self.debug("%s: grouping by levels: %s" % (self, str(level))) for key, work in dataframe.groupby(level=level): try: results.extend(self.renderer(work, path=key)) except: self.error("%s: exception in rendering" % self) results.append(Utils.buildException("rendering")) if len(results) == 0: self.warn("renderer returned no data.") raise ValueError("renderer returned no data.") self.debug("%s: rendering data finished with %i blocks" % (self.tracker, len(results))) return results