def __call__(self, track, slice=None): edir = EXPORTDIR method = "utr_extension" blocks = ResultBlocks() filepath = "%(edir)s/%(method)s/%(track)s.readextension_%(region)s_%(direction)s.%(slice)s.png" block = \ ''' .. figure:: %(filename)s :height: 300 ''' # append spaces for file extension block = "\n".join([x + " " * 200 for x in block.split("\n")]) for region, direction in itertools.product(("downstream", "upstream"), ("sense", "antisense", "anysense")): filename = filepath % locals() if os.path.exists(filename): blocks.append(ResultBlock(text=block % locals(), title="%(track)s %(region)s %(direction)s" % locals())) # else: # blocks.append( ResultBlock( "", # title = "%(track)s %(region)s %(direction)s" % locals() ) ) return odict((("rst", "\n".join(Utils.layoutBlocks(blocks, layout="columns-3"))),))
def __call__(self, track, slice=None): # note there are spaces behind the %(image)s directive to accomodate # for path substitution block = ''' .. figure:: %(image)s :height: 300 ''' blocks = ResultBlocks() tracks = sorted([x.asFile() for x in TRACKS]) for track in tracks: files = glob.glob( os.path.join(EXPORTDIR, "fastqc", "%s*_fastqc" % track)) for x, fn in enumerate(sorted(files)): y = x + 1 image = os.path.abspath( os.path.join(fn, "Images", "%s.png" % slice)) if not os.path.exists(image): continue blocks.append(ResultBlock(text=block % locals(), title=os.path.basename(fn))) return odict((("rst", "\n".join(Utils.layoutBlocks( blocks, layout="columns-2"))),))
def __call__(self, dataframe, path): result = ResultBlocks() texts = [] if self.head or self.tail: if self.head: texts.append(str(dataframe.head(self.head))) if self.tail: texts.append(str(dataframe.tail(self.tail))) elif self.summary: texts.append(str(dataframe.describe())) else: texts.append(str(dataframe)) # add indentation texts = ['\n'.join([' %s' % y for y in x.split('\n')]) for x in texts] formatted = ''' :: %s ''' % '\n ...\n'.join(texts) result.append(ResultBlock(formatted, title=path2str(path))) return result
def __call__(self, track, slice=None): edir = EXPORTDIR method = "utr_extension" blocks = ResultBlocks() filepath = "%(edir)s/%(method)s/%(track)s.readextension_%(region)s_%(direction)s.%(slice)s.png" block = \ ''' .. figure:: %(filename)s :height: 300 ''' # append spaces for file extension block = "\n".join([x + " " * 200 for x in block.split("\n")]) for region, direction in itertools.product( ("downstream", "upstream"), ("sense", "antisense", "anysense")): filename = filepath % locals() if os.path.exists(filename): blocks.append( ResultBlock(text=block % locals(), title="%(track)s %(region)s %(direction)s" % locals())) # else: # blocks.append( ResultBlock( "", # title = "%(track)s %(region)s %(direction)s" % locals() ) ) return odict( (("rst", "\n".join(Utils.layoutBlocks(blocks, layout="columns-3"))), ))
def __call__(self, track, slice=None): # note there are spaces behind the %(image)s directive to accomodate # for path substitution block = ''' .. figure:: %(image)s :height: 300 ''' blocks = ResultBlocks() tracks = sorted([x.asFile() for x in TRACKS]) for track in tracks: files = glob.glob( os.path.join(EXPORTDIR, "fastqc", "%s*_fastqc" % track)) for x, fn in enumerate(sorted(files)): y = x + 1 image = os.path.abspath( os.path.join(fn, "Images", "%s.png" % slice)) if not os.path.exists(image): continue blocks.append( ResultBlock(text=block % locals(), title=os.path.basename(fn))) return odict( (("rst", "\n".join(Utils.layoutBlocks(blocks, layout="columns-2"))), ))
def render(self, data): # initiate output structure results = ResultBlocks(title='debug') try: results.append(ResultBlock(json.dumps( data, indent=4), title='')) except TypeError: results.append(ResultBlock(str(data), title='')) return results
def __call__(self, dataframe, path): results = ResultBlocks() if dataframe is None: return results title = path2str(path) row_headers = dataframe.index col_headers = dataframe.columns results.append(self.asSpreadSheet(dataframe, row_headers, col_headers, title)) return results
def __call__(self, track, slice=None): blocks = ResultBlocks() block = ''' .. figure:: %(image)s :height: 300 ''' for image in glob.glob(os.path.join(IMAGEDIR, "*.png")): blocks.append(ResultBlock(text=block % locals(), title="image")) return odict((("rst", "\n".join(Utils.layoutBlocks(blocks, layout="columns-2"))),))
def render(self, dataframe, path): blocks = ResultBlocks() options = self.get_slideshow_options() lines = [self.prefix % options] for title, row in dataframe.iterrows(): row = row[row.notnull()] values = row.tolist() headers = list(row.index) dataseries = dict(zip(headers, values)) try: # return value is a series filename = dataseries['filename'] except KeyError: self.warn( "no 'filename' key in path %s" % (path2str(path))) return blocks try: # return value is a series name = dataseries['name'] except KeyError: self.warn( "no 'name' key in path %s" % (path2str(path))) return blocks description, title = os.path.split(name) lines.extend(self.add_image(filename, title, description)) lines.append("""</div>""") lines.append(self.skin % options) lines.append("""</div>""") lines = "\n".join(lines).split("\n") lines = [".. only::html\n"] +\ [" .. raw:: html\n"] +\ [" " + x for x in lines] lines = "\n".join(lines) blocks.append(ResultBlock(text=lines, title=path2str(path))) return blocks
def render(self, data): # initiate output structure results = ResultBlocks(title='user') labels = DataTree.getPaths(data) # iterate over all items at leaf for path, branch in DataTree.getNodes(data, len(labels) - 2): for key in Utils.TrackerKeywords: if key in branch: # add a result block results.append(ResultBlock(branch[key], title=path2str(path))) return results
def __call__(self, dataframe, path): results = ResultBlocks() if dataframe is None: return results title = path2str(path) row_headers = dataframe.index col_headers = dataframe.columns # do not output large matrices as rst files if self.separate or (not self.force and (len(row_headers) > self.max_rows or len(col_headers) > self.max_cols)): if self.large == "xls": results.append(self.asSpreadSheet(dataframe, row_headers, col_headers, title)) else: results.append(self.asFile(dataframe, row_headers, col_headers, title)) results.append(self.asRST(dataframe, row_headers, col_headers, title)) return results
def render(self, work, path): """render the data. """ results = ResultBlocks(title=path) matrix, rows, columns = self.buildMatrix(work) title = path2str(path) if len(rows) == 0: return ResultBlocks(ResultBlock("", title=title)) # do not output large matrices as rst files # separate and force need to be mixed in. if self.separate or (not self.force and (len(rows) > self.max_rows or len(columns) > self.max_cols)): return ResultBlocks(self.asFile(pandas.DataFrame(matrix, index=rows, columns=columns), rows, columns, title), title=path) lines = [] lines.append(".. csv-table:: %s" % title) lines.append(' :header: "track","%s" ' % '","'.join(columns)) lines.append('') for row in range(len(rows)): lines.append( ' "%s","%s"' % (rows[row], '","'.join( [self.toString(x) for x in matrix[row]]))) lines.append("") if path is None: subtitle = "" else: subtitle = path2str(path) results.append(ResultBlock("\n".join(lines), title=subtitle)) return results
def endPlot(self, plots, legends, path): """close plots. """ result = ResultBlocks() title = path2str(path) for plot in plots: figid = plot._id lines = [] lines.append("") lines.append("#$bkh %s$#" % figid) lines.append("") r = ResultBlock("\n".join(lines), title=title) r.bokeh = plot result.append(r) return result
def render(self, dataframe, path): if len(dataframe.columns) < 2: raise ValueError( "requiring two coordinates, only got %s" % str(dataframe.columns)) plts, legend = [], [] blocks = ResultBlocks() for xcolumn, ycolumn in itertools.combinations(dataframe.columns, 2): # remove missing data points xvalues, yvalues = Stats.filterMissing( (dataframe[xcolumn], dataframe[ycolumn])) # remove columns with all NaN if len(xvalues) == 0 or len(yvalues) == 0: continue # apply log transformation on data not on plot if self.logscale: if "x" in self.logscale: xvalues = R.log10(xvalues) if "y" in self.logscale: yvalues = R.log10(yvalues) self.startPlot() # wrap, as pandas series can not # passed through rpy2. R.smoothScatter(numpy.array(xvalues, dtype=numpy.float), numpy.array(yvalues, dtype=numpy.float), xlab=xcolumn, ylab=ycolumn, nbin=self.nbins) blocks.extend(self.endPlot(dataframe, path)) return blocks
def __call__(self, dataframe, path): results = ResultBlocks() if dataframe is None: return results title = path2str(path) lines = [] lines.append(".. glossary::") lines.append("") for x, row in enumerate(dataframe.iterrows()): header, data = row txt = "\n ".join([x.strip() for x in str(data).split("\n")]) lines.append(' %s\n %s\n' % (path2str(header), txt)) lines.append("") results.append(ResultBlock("\n".join(lines), title=title)) return results
def __call__(self, dataframe, path): # modify table (adding/removing columns) according to user options # matrix, row_headers, col_headers = \ # self.modifyTable(matrix, row_headers, col_headers) dataframe = self.modifyTable(dataframe) title = path2str(path) results = ResultBlocks() row_headers = dataframe.index col_headers = dataframe.columns # as of sphinx 1.3.1, tables with more than 100 columns cause an # error: # Exception occurred: # File "/ifs/apps/apps/python-2.7.9/lib/python2.7/site-packages/docutils/writers/html4css1/__init__.py", line 642, in write_colspecs # colwidth = int(node['colwidth'] * 100.0 / width + 0.5) # ZeroDivisionError: float division by zero # # Thus, for table with more than 100 columns, force will be # disabled and max_cols set to a low value in order to make # sure the table is not displayed inline if len(col_headers) >= 90: self.force = False self.max_cols = 10 # do not output large matrices as rst files if self.separate or (not self.force and (len(row_headers) > self.max_rows or len(col_headers) > self.max_cols)): if self.large == "xls": results.append(self.asSpreadSheet(dataframe, row_headers, col_headers, title)) else: results.append(self.asFile(dataframe, row_headers, col_headers, title)) if self.preview: raise NotImplementedError('preview not implemented') row_headers = row_headers[:self.max_rows] col_headers = col_headers[:self.max_cols] # matrix = [x[:self.max_cols] for x in # matrix[:self.max_rows]] else: return results results.append(self.asCSV(dataframe, row_headers, col_headers, title)) return results
def __call__(self, data, path): colorbar, factors, unique, xkcd = self.getColorBar(data) n_samples = data.shape[0] data = data.iloc[:, :n_samples] col_dict = dict(list(zip(unique, xkcd))) print(data.head()) seaborn.set(font_scale=.5) ax = seaborn.clustermap(data, row_colors=colorbar, col_colors=colorbar) plt.setp(ax.ax_heatmap.yaxis.set_visible(False)) for label in unique: ax.ax_col_dendrogram.bar( 0, 0, color=seaborn.xkcd_rgb[col_dict[label]], label=label, linewidth=0) ax.ax_col_dendrogram.legend(loc="center", ncol=len(unique)) return ResultBlocks(ResultBlock( '''#$mpl %i$#\n''' % ax.cax.figure.number, title='ClusterMapPlot'))
def __call__(self, dataframe, path): # modify table (adding/removing columns) according to user options # matrix, row_headers, col_headers = \ # self.modifyTable(matrix, row_headers, col_headers) dataframe = self.modifyTable(dataframe) title = path2str(path) results = ResultBlocks() row_headers = dataframe.index col_headers = dataframe.columns # do not output large matrices as rst files if self.separate or (not self.force and (len(row_headers) > self.max_rows or len(col_headers) > self.max_cols)): if self.large == "xls": results.append(self.asSpreadSheet(dataframe, row_headers, col_headers, title)) else: results.append(self.asFile(dataframe, row_headers, col_headers, title)) if self.preview: raise NotImplementedError('preview not implemented') row_headers = row_headers[:self.max_rows] col_headers = col_headers[:self.max_cols] # matrix = [x[:self.max_cols] for x in # matrix[:self.max_rows]] else: return results results.append(self.asCSV(dataframe, row_headers, col_headers, title)) return results
def render(self): '''supply the:class:`Renderer.Renderer` with the data to render. The data supplied will depend on the ``groupby`` option. returns a ResultBlocks data structure. ''' self.debug("%s: rendering data started for %i items" % (self, len(self.data))) # initiate output structure results = ResultBlocks(title="") dataframe = self.data # dataframe.write_csv("test.csv") if dataframe is None: self.warn("%s: no data after conversion" % self) raise ValueError("no data for renderer") # special patch: set column names to pruned levels # if there are no column names if len(dataframe.columns) == len(self.pruned): if list(dataframe.columns) == list(range(len(dataframe.columns))): dataframe.columns = [x[1] for x in self.pruned] nlevels = Utils.getDataFrameLevels(dataframe) self.debug("%s: rendering data started. " "levels=%i, group_level=%s" % (self, nlevels, str(self.group_level))) if self.group_level < 0: # no grouping for renderers that will accept # a dataframe with any level of indices and no explicit # grouping has been asked for. results.append(self.renderer(dataframe, path=())) else: level = Utils.getGroupLevels( dataframe, max_level=self.group_level+1) self.debug("%s: grouping by levels: %s" % (self, str(level))) for key, work in dataframe.groupby(level=level): try: results.append(self.renderer(work, path=key)) except: self.error("%s: exception in rendering" % self) results.append( ResultBlocks(Utils.buildException("rendering"))) if len(results) == 0: self.warn("renderer returned no data.") raise ValueError("renderer returned no data.") self.debug("%s: rendering data finished with %i blocks" % (self.tracker, len(results))) return results
def __call__(self, *args, **kwargs): try: self.parseArguments(*args, **kwargs) except: self.error("%s: exception in parsing" % self) return ResultBlocks(ResultBlocks(Utils.buildException("parsing"))) # collect no data if tracker is the empty tracker # and go straight to rendering try: if self.tracker.getTracks() == ["empty"]: # is instance does not work because of module mapping # type(Tracker.Empty) == CGATReport.Tracker.Empty # type(self.tracker) == Tracker.Empty # if isinstance(self.tracker, Tracker.Empty): result = self.renderer() return ResultBlocks(result) except AttributeError: # for function trackers pass self.debug("profile: started: tracker: %s" % (self.tracker)) # collecting data try: self.collect() except: self.error("%s: exception in collection" % self) return ResultBlocks(ResultBlocks( Utils.buildException("collection"))) finally: self.debug("profile: finished: tracker: %s" % (self.tracker)) if self.tree is None or len(self.tree) == 0: self.info("%s: no data - processing complete" % self.tracker) return None data_paths = DataTree.getPaths(self.tree) self.debug("%s: after collection: %i data_paths: %s" % (self, len(data_paths), str(data_paths))) # special Renderers - do not process data further but render # directly. Note that no transformations will be applied. if isinstance(self.renderer, Renderer.User): results = ResultBlocks(title="main") results.append(self.renderer(self.tree)) return results elif isinstance(self.renderer, Renderer.Debug): results = ResultBlocks(title="main") results.append(self.renderer(self.tree)) return results # merge all data to hierarchical indexed dataframe self.data = DataTree.asDataFrame(self.tree) self.debug("dataframe memory usage: total=%i,data=%i,index=%i,col=%i" % (self.data.values.nbytes + self.data.index.nbytes + self.data.columns.nbytes, self.data.values.nbytes, self.data.index.nbytes, self.data.columns.nbytes)) # if tracks are set by tracker, call tracker with dataframe if self.indexFromTracker: self.tracker.setIndex(self.data) # transform data try: self.transform() except: self.error("%s: exception in transformation" % self) return ResultBlocks(ResultBlocks( Utils.buildException("transformation"))) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after transformation: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) # restrict try: self.filterPaths(self.restrict_paths, mode="restrict") except: self.error("%s: exception in restrict" % self) return ResultBlocks(ResultBlocks( Utils.buildException("restrict"))) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after restrict: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) # exclude try: self.filterPaths(self.exclude_paths, mode="exclude") except: self.error("%s: exception in exclude" % self) return ResultBlocks(ResultBlocks(Utils.buildException("exclude"))) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after exclude: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) # No pruning - maybe enable later as a user option self.pruned = [] # try: # self.prune() # except: # self.error("%s: exception in pruning" % self) # return ResultBlocks(ResultBlocks(Utils.buildException("pruning"))) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after pruning: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) try: self.group() except: self.error("%s: exception in grouping" % self) return ResultBlocks(ResultBlocks(Utils.buildException("grouping"))) # data_paths = DataTree.getPaths(self.data) # self.debug("%s: after grouping: %i data_paths: %s" % # (self, len(data_paths), str(data_paths))) if self.renderer is not None: self.debug("profile: started: renderer: %s" % (self.renderer)) try: result = self.render() except: self.error("%s: exception in rendering" % self) return ResultBlocks(ResultBlocks( Utils.buildException("rendering"))) finally: self.debug("profile: finished: renderer: %s" % (self.renderer)) else: result = ResultBlocks(title="") return result
def run(arguments, options, lineno, content, state_machine=None, document=None, srcdir=None, builddir=None): """process:report: directive. *srdir* - top level directory of rst documents *builddir* - build directory """ tag = "%s:%i" % (str(document), lineno) logging.debug("report_directive.run: profile: started: rst: %s" % tag) # sort out the paths # reference is used for time-stamping tracker_name = directives.uri(arguments[0]) (basedir, fname, basename, ext, outdir, codename, notebookname) = Utils.build_paths(tracker_name) # get the directory of the rst file # state_machine.document.attributes['source']) rstdir, rstfile = os.path.split(document) # root of document tree if srcdir is None: srcdir = setup.srcdir # build directory if builddir is None: builddir = setup.builddir # remove symbolic links srcdir, builddir, rstdir = [ os.path.realpath(x) for x in (srcdir, builddir, rstdir)] # there are three directories: # builddir = directory where document is built in # (usually _build/html or similar) # rstdir = directory where rst sources are located # srcdir = directory from which the build process is started # path to root relative to rst rst2srcdir = os.path.join(os.path.relpath(srcdir, start=rstdir), outdir) # path to root relative to rst rst2builddir = os.path.join( os.path.relpath(builddir, start=rstdir), outdir) # path relative to source (for images) root2builddir = os.path.join( os.path.relpath(builddir, start=srcdir), outdir) logging.debug( "report_directive.run: arguments=%s, options=%s, lineno=%s, " "content=%s, document=%s" % (str(arguments), str(options), str(lineno), str(content), str(document))) logging.debug( "report_directive.run: plotdir=%s, basename=%s, ext=%s, " "fname=%s, rstdir=%s, srcdir=%s, builddir=%s" % (tracker_name, basename, ext, fname, rstdir, srcdir, builddir)) logging.debug( "report_directive.run: tracker_name=%s, basedir=%s, " "rst2src=%s, root2build=%s, outdir=%s, codename=%s" % (tracker_name, basedir, rst2srcdir, rst2builddir, outdir, codename)) # try to create. If several processes try to create it, # testing with `if` will not work. try: os.makedirs(outdir) except OSError as msg: pass if not os.path.exists(outdir): raise OSError("could not create directory %s: %s" % (outdir, msg)) ######################################################## # collect options # replace placedholders try: options = Utils.updateOptions(options) except ValueError as msg: logging.warn("failure while updating options: %s" % msg) logging.debug("report_directive.run: options=%s" % (str(options),)) transformer_names = [] renderer_name = None # get layout option layout = options.get("layout", "column") option_map = Component.getOptionMap() renderer_options = Utils.selectAndDeleteOptions( options, option_map["render"]) transformer_options = Utils.selectAndDeleteOptions( options, option_map["transform"]) dispatcher_options = Utils.selectAndDeleteOptions( options, option_map["dispatch"]) tracker_options = Utils.selectAndDeleteOptions( options, option_map["tracker"]) display_options = Utils.selectAndDeleteOptions( options, option_map["display"]) logging.debug("report_directive.run: renderer options: %s" % str(renderer_options)) logging.debug("report_directive.run: transformer options: %s" % str(transformer_options)) logging.debug("report_directive.run: dispatcher options: %s" % str(dispatcher_options)) logging.debug("report_directive.run: tracker options: %s" % str(tracker_options)) logging.debug("report_directive.run: display options: %s" % str(display_options)) if "transform" in display_options: transformer_names = display_options["transform"].split(",") del display_options["transform"] if "render" in display_options: renderer_name = display_options["render"] del display_options["render"] ######################################################## # check for missing files if renderer_name is not None: options_key = str(renderer_options) +\ str(transformer_options) +\ str(dispatcher_options) +\ str(tracker_options) +\ str(transformer_names) options_hash = hashlib.md5(options_key.encode()).hexdigest() template_name = Utils.quote_filename( Config.SEPARATOR.join((tracker_name, renderer_name, options_hash))) filename_text = os.path.join(outdir, "%s.txt" % (template_name)) notebookname += options_hash logging.debug("report_directive.run: options_hash=%s" % options_hash) ########################################################### # check for existing files # update strategy does not use file stamps, but checks # for presence/absence of text element and if all figures # mentioned in the text element are present ########################################################### queries = [re.compile("%s(%s\S+.%s)" % (root2builddir, outdir, suffix)) for suffix in ("png", "pdf", "svg")] logging.debug("report_directive.run: checking for changed files.") # check if text element exists if os.path.exists(filename_text): lines = [x[:-1] for x in open(filename_text, "r").readlines()] filenames = [] # check if all figures are present for line in lines: for query in queries: x = query.search(line) if x: filenames.extend(list(x.groups())) logging.debug( "report_directive.run: %s: checking for %s" % (tag, str(filenames))) for filename in filenames: if not os.path.exists(filename): logging.info( "report_directive.run: %s: redo: %s missing" % (tag, filename)) break else: logging.info( "report_directive.run: %s: noredo: all files are present" % tag) # all is present - save text and return if lines and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) return [] else: logging.debug( "report_directive.run: %s: no check performed: %s missing" % (tag, str(filename_text))) else: template_name = "" filename_text = None ########################################################## # Initialize collectors collectors = [] for collector in list(Component.getPlugins("collect").values()): collectors.append(collector()) ########################################################## # instantiate tracker, dispatcher, renderer and transformers # and collect output ########################################################### try: ######################################################## # find the tracker logging.debug( "report_directive.run: collecting tracker %s with options %s " % (tracker_name, tracker_options)) code, tracker, tracker_path = Utils.makeTracker( tracker_name, (), tracker_options) if not tracker: logging.error( "report_directive.run: no tracker - no output from %s " % str(document)) raise ValueError("tracker `%s` not found" % tracker_name) logging.debug( "report_directive.run: collected tracker %s" % tracker_name) tracker_id = Cache.tracker2key(tracker) ######################################################## # determine the transformer logging.debug("report_directive.run: creating transformers") transformers = Utils.getTransformers( transformer_names, transformer_options) ######################################################## # determine the renderer logging.debug("report_directive.run: creating renderer.") if renderer_name is None: logging.error( "report_directive.run: no renderer - no output from %s" % str(document)) raise ValueError("the report directive requires a renderer") renderer = Utils.getRenderer(renderer_name, renderer_options) try: renderer.set_paths(rstdir, srcdir, builddir) renderer.set_display_options(display_options) except AttributeError: # User renderers will not have these methods pass ######################################################## # create and call dispatcher logging.debug("report_directive.run: creating dispatcher") dispatcher = Dispatcher.Dispatcher(tracker, renderer, transformers) # add the tracker options dispatcher_options.update(tracker_options) blocks = dispatcher(**dispatcher_options) if blocks is None: blocks = ResultBlocks(ResultBlocks( Utils.buildWarning( "NoData", "tracker %s returned no Data" % str(tracker)))) code = None tracker_id = None except: logging.warn( "report_directive.run: exception caught at %s:%i - see document" % (str(document), lineno)) blocks = ResultBlocks(ResultBlocks( Utils.buildException("invocation"))) code = None tracker_id = None logging.debug( "report_directive.run: profile: started: collecting: %s" % tag) ######################################################## # write code output linked_codename = re.sub("\\\\", "/", os.path.join(rst2srcdir, codename)) if code and basedir != outdir: with open(os.path.join(outdir, codename), "w") as outfile: for line in code: outfile.write(line) ######################################################## # write notebook snippet linked_notebookname = re.sub( "\\\\", "/", os.path.join(rst2srcdir, notebookname)) if basedir != outdir and tracker_id is not None: with open(os.path.join(outdir, notebookname), "w") as outfile: Utils.writeNoteBookEntry(outfile, renderer=renderer_name, tracker=tracker_name, transformers=transformer_names, tracker_path=tracker_path, options=renderer_options.items() + tracker_options.items() + transformer_options.items()) ########################################################### # collect images ########################################################### map_figure2text = {} links = {'code_url': linked_codename, 'notebook_url': linked_notebookname} try: for collector in collectors: map_figure2text.update(collector.collect( blocks, template_name, outdir, rstdir, builddir, srcdir, content, display_options, tracker_id, links=links)) except: logging.warn("report_directive.run: exception caught while " "collecting with %s at %s:%i - see document" % (collector, str(document), lineno)) blocks = ResultBlocks(ResultBlocks( Utils.buildException("collection"))) code = None tracker_id = None ########################################################### # replace place holders or add text ########################################################### # add default for text-only output urls = Utils.asList(Utils.PARAMS["report_urls"]) code_url, nb_url = "", "" if "code" in urls: code_url = "`code <%(code_url)s>`__" % links if "notebook" in urls: nb_url = '`nb <%(notebook_url)s>`__' % links map_figure2text["default-prefix"] = TEMPLATE_TEXT % locals() map_figure2text["default-suffix"] = "" blocks.updatePlaceholders(map_figure2text) # render the output taking into account the layout lines = Utils.layoutBlocks(blocks, layout) lines.append("") # add caption lines.extend(['::', '']) if content: lines.extend([' %s' % row.strip() for row in content]) lines.append("") lines.append("") # output rst text for this renderer if filename_text: outfile = open(filename_text, "w") outfile.write("\n".join(lines)) outfile.close() if CGATREPORT_DEBUG: for x, l in enumerate(lines): print("%5i %s" % (x, l)) if len(lines) and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) logging.debug( "report_directive.run: profile: finished: collecting: %s" % tag) logging.debug( "report_directive.run: profile: finished: rst: %s:%i" % (str(document), lineno)) return []
def __call__(self, dataframe, path): '''iterate over leaves/branches in data structure. This method will call the:meth:`render` method. Large dataframes are split into multiple, smaller rendered objects if self.split_at is not zero. By default, dataframes are split along the hierachical index. However, if there is only a single index, but multiple columns, the split is performed on the columns instead. This is used when splitting coordinate data as a result of the histogram transformation. ''' result = ResultBlocks() if not self.split_at: result.extend(self.render(dataframe, path)) else: # split dataframe at first index level = Utils.getGroupLevels(dataframe) grouper = dataframe.groupby(level=level) # split dataframe column wise if only one index # and multiple columns if len(grouper) == 1 and len(dataframe.columns) > self.split_at: columns = list(dataframe.columns) always = [] if self.split_keep_first_column: always.append(columns[0]) # columns to always keep always.extend([c for c in columns if c in self.split_always]) columns = [c for c in columns if c not in always] for x in range(0, len(columns), self.split_at): # extract a set of columns result.extend(self.render( dataframe.loc[:, always+columns[x:x+self.split_at]], path)) # split dataframe along index elif len(grouper) >= self.split_at: # build groups always, remove_always = [], set() if self.split_always: for key, work in grouper: for pat in self.split_always: rx = re.compile(pat) if rx.search(path2str(key)): always.append((key, work)) remove_always.add(key) grouper = dataframe.groupby(level=level) def _group_group(grouper, always, remove_always): group = always[:] for key, work in grouper: if key in remove_always: continue group.append((key, work)) if len(group) >= self.split_at: yield group group = always[:] # reconcile index names yield group first = True for group in _group_group(grouper, always, remove_always): # do not plot last dataframe that contains # only the common tracks to plot if not first and len(group) == len(always): continue first = False df = pandas.concat( [x[1] for x in group]) # reconcile index names df.index.names = dataframe.index.names result.extend(self.render(df, path)) else: # do not split dataframe result.extend(self.render(dataframe, path)) return result
def layoutBlocks(blocks, layout="column"): """layout blocks of rst text. layout can be one of "column", "row", or "grid". The layout uses an rst table to arrange elements. """ lines = [] if len(blocks) == 0: return lines # flatten blocks bb = ResultBlocks() for b in blocks: if b.title: b.updateTitle(b.title, "prefix") try: bb.extend(b) except TypeError: bb.append(b) blocks = bb # check if postambles are identical across all blocks postambles = set([b.postamble for b in blocks]) if len(postambles) == 1: blocks.clearPostamble() postamble = postambles.pop() else: postamble = None if layout == "column": for block in blocks: if block.title: lines.extend(block.title.split("\n")) lines.append("") else: warn("report_directive.layoutBlocks: missing title") lines.extend(block.text.split("\n")) lines.extend(block.postamble.split("\n")) lines.append("") if postamble: lines.extend(postamble.split("\n")) lines.append("") return lines elif layout in ("row", "grid"): if layout == "row": ncols = len(blocks) elif layout == "grid": ncols = int(math.ceil(math.sqrt(len(blocks)))) elif layout.startswith("column"): ncols = min(len(blocks), int(layout.split("-")[1])) # TODO: think about appropriate fix for empty data if ncols == 0: ncols = 1 return lines else: raise ValueError("unknown layout %s " % layout) if ncols == 0: warn("no columns") return lines # compute column widths widths = [x.getWidth() for x in blocks] text_heights = [x.getTextHeight() for x in blocks] title_heights = [x.getTitleHeight() for x in blocks] columnwidths = [] for x in range(ncols): columnwidths.append(max([widths[y] for y in range(x, len(blocks), ncols)])) separator = "+%s+" % "+".join(["-" * x for x in columnwidths]) # add empty blocks if len(blocks) % ncols: blocks.extend([ResultBlock("", "")] * (ncols - len(blocks) % ncols)) for nblock in range(0, len(blocks), ncols): # add text lines.append(separator) max_height = max(text_heights[nblock:nblock + ncols]) new_blocks = ResultBlocks() for xx in range(nblock, min(nblock + ncols, len(blocks))): txt, col = blocks[xx].text.split("\n"), xx % ncols txt = blocks[xx].text.split("\n") + \ blocks[xx].postamble.split("\n") col = xx % ncols max_width = columnwidths[col] # add missig lines txt.extend([""] * (max_height - len(txt))) # extend lines txt = [x + " " * (max_width - len(x)) for x in txt] new_blocks.append(txt) for l in zip(*new_blocks): lines.append("|%s|" % "|".join(l)) # add subtitles max_height = max(title_heights[nblock:nblock + ncols]) if max_height > 0: new_blocks = ResultBlocks() lines.append(separator) for xx in range(nblock, min(nblock + ncols, len(blocks))): txt, col = blocks[xx].title.split("\n"), xx % ncols max_width = columnwidths[col] # add missig lines txt.extend([""] * (max_height - len(txt))) # extend lines txt = [x + " " * (max_width - len(x)) for x in txt] new_blocks.append(txt) for l in zip(*new_blocks): lines.append("|%s|" % "|".join(l)) lines.append(separator) if postamble: lines.append(postamble) lines.append("") return lines
def __call__(self, dataframe, path): '''iterate over leaves/branches in data structure. This method will call the:meth:`render` method ''' result = ResultBlocks() if not self.split_at: result.extend(self.render(dataframe, path)) else: # split dataframe at first index level = Utils.getGroupLevels(dataframe) grouper = dataframe.groupby(level=level) if len(grouper) < self.split_at: result.extend(self.render(dataframe, path)) else: # build groups always, remove_always = [], set() if self.split_always: for key, work in grouper: for pat in self.split_always: rx = re.compile(pat) if rx.search(path2str(key)): always.append((key, work)) remove_always.add(key) grouper = dataframe.groupby(level=level) def _group_group(grouper, always, remove_always): group = always[:] for key, work in grouper: if key in remove_always: continue group.append((key, work)) if len(group) >= self.split_at: yield group group = always[:] # reconcile index names yield group first = True for group in _group_group(grouper, always, remove_always): # do not plot last dataframe that contains # only the common tracks to plot if not first and len(group) == len(always): continue first = False df = pandas.concat( [x[1] for x in group]) # reconcile index names df.index.names = dataframe.index.names result.extend(self.render(df, path)) return result
def run(arguments, options, lineno, content, state_machine=None, document=None, srcdir=None, builddir=None, build_environment=None): """process:report: directive. *srdir* - top level directory of rst documents *builddir* - build directory """ tag = "%s:%i" % (str(document), lineno) logger = Component.get_logger() logger.debug("report_directive.run: profile: started: rst: %s" % tag) # sort out the paths # reference is used for time-stamping tracker_name = directives.uri(arguments[0]) (basedir, fname, basename, ext, outdir, codename, notebookname) = Utils.build_paths(tracker_name) # get the directory of the rst file # state_machine.document.attributes['source']) rstdir, rstfile = os.path.split(document) # root of document tree if srcdir is None: srcdir = setup.srcdir # build directory if builddir is None: builddir = setup.builddir # remove symbolic links srcdir, builddir, rstdir = [ os.path.abspath(os.path.realpath(x)) for x in (srcdir, builddir, rstdir)] # there are three directories: # builddir = directory where document is built in # (usually _build/html or similar) # rstdir = directory where rst sources are located # srcdir = directory from which the build process is started # path to root relative to rst rst2srcdir = os.path.join(os.path.relpath(srcdir, start=rstdir), outdir) # path to root relative to rst rst2builddir = os.path.join( os.path.relpath(builddir, start=rstdir), outdir) # path relative to source (for images) root2builddir = os.path.join( os.path.relpath(builddir, start=srcdir), outdir) logger.debug( "report_directive.run: arguments=%s, options=%s, lineno=%s, " "content=%s, document=%s" % (str(arguments), str(options), str(lineno), str(content), str(document))) logger.debug( "report_directive.run: plotdir=%s, basename=%s, ext=%s, " "fname=%s, rstdir=%s, srcdir=%s, builddir=%s" % (tracker_name, basename, ext, fname, rstdir, srcdir, builddir)) logger.debug( "report_directive.run: tracker_name=%s, basedir=%s, " "rst2src=%s, root2build=%s, outdir=%s, codename=%s" % (tracker_name, basedir, rst2srcdir, rst2builddir, outdir, codename)) # try to create. If several processes try to create it, # testing with `if` will not work. try: os.makedirs(outdir) except OSError as msg: pass if not os.path.exists(outdir): raise OSError("could not create directory %s: %s" % (outdir, msg)) ######################################################## # collect options # replace placedholders try: options = update_options(options) except ValueError as msg: logger.warn("failure while updating options: %s" % msg) logger.debug("report_directive.run: options=%s" % (str(options),)) transformer_names = [] renderer_name = None layout = options.get("layout", "column") long_titles = "long-titles" in options option_map = get_option_map() renderer_options = select_and_delete_options( options, option_map["render"]) transformer_options = select_and_delete_options( options, option_map["transform"]) dispatcher_options = select_and_delete_options( options, option_map["dispatch"]) tracker_options = select_and_delete_options( options, option_map["tracker"], expand=["tracker"]) display_options = get_default_display_options() display_options.update(select_and_delete_options( options, option_map["display"])) logger.debug("report_directive.run: renderer options: %s" % str(renderer_options)) logger.debug("report_directive.run: transformer options: %s" % str(transformer_options)) logger.debug("report_directive.run: dispatcher options: %s" % str(dispatcher_options)) logger.debug("report_directive.run: tracker options: %s" % str(tracker_options)) logger.debug("report_directive.run: display options: %s" % str(display_options)) if "transform" in display_options: transformer_names = display_options["transform"].split(",") del display_options["transform"] if "render" in display_options: renderer_name = display_options["render"] del display_options["render"] ######################################################## # check for missing files if renderer_name is not None: options_key = str(renderer_options) +\ str(transformer_options) +\ str(dispatcher_options) +\ str(tracker_options) +\ str(transformer_names) +\ re.sub("\s", "", "".join(content)) options_hash = hashlib.md5(options_key.encode()).hexdigest()[:10] template_name = Utils.quote_filename( Config.SEPARATOR.join((tracker_name, renderer_name, options_hash))) filename_text = os.path.join(outdir, "%s.txt" % (template_name)) rstname = os.path.basename(filename_text) notebookname += options_hash logger.debug("report_directive.run: options_hash=%s" % options_hash) ########################################################### # check for existing files # update strategy does not use file stamps, but checks # for presence/absence of text element and if all figures # mentioned in the text element are present ########################################################### queries = [re.compile("%s/(\S+.%s)" % (root2builddir, suffix)) for suffix in ("png", "pdf", "svg")] logger.debug("report_directive.run: checking for changed files.") # check if text element exists if os.path.exists(filename_text): with open(filename_text, "r", encoding="utf-8") as inf: lines = [x[:-1] for x in inf] filenames = [] # check if all figures are present for line in lines: for query in queries: x = query.search(line) if x: filenames.extend(list(x.groups())) filenames = [os.path.join(outdir, x) for x in filenames] if len(filenames) == 0: logger.info("report_directive.run: %s: redo: no files found" % tag) else: logger.debug( "report_directive.run: %s: checking for %s" % (tag, str(filenames))) for filename in filenames: if not os.path.exists(filename): logger.info( "report_directive.run: %s: redo: file %s is missing" % (tag, filename)) break else: logger.info( "report_directive.run: %s: noredo: all files are present" % tag) # all is present - save text and return if lines and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) return [] else: logger.debug( "report_directive.run: %s: no check performed: %s missing" % (tag, str(filename_text))) else: template_name = "" filename_text = None collect_here = False ########################################################## # instantiate tracker, dispatcher, renderer and transformers # and collect output ########################################################### try: ######################################################## # find the tracker logger.debug( "report_directive.run: collecting tracker %s with options %s " % (tracker_name, tracker_options)) code, tracker, tracker_path = make_tracker( tracker_name, (), tracker_options) if not tracker: logger.error( "report_directive.run: no tracker - no output from %s " % str(document)) raise ValueError("tracker `%s` not found" % tracker_name) logger.debug( "report_directive.run: collected tracker %s" % tracker_name) tracker_id = Cache.tracker2key(tracker) ######################################################## # determine the transformer logger.debug("report_directive.run: creating transformers") transformers = get_transformers( transformer_names, transformer_options) ######################################################## # determine the renderer logger.debug("report_directive.run: creating renderer.") if renderer_name is None: logger.error( "report_directive.run: no renderer - no output from %s" % str(document)) raise ValueError("the report directive requires a renderer") renderer = get_renderer(renderer_name, renderer_options) try: renderer.set_paths(rstdir, srcdir, builddir) renderer.set_display_options(display_options) renderer.set_build_environment(build_environment) except AttributeError: # User renderers will not have these methods pass ######################################################## # write code output linked_codename = re.sub("\\\\", "/", os.path.join(rst2builddir, codename)) if code and basedir != outdir: if six.PY2: with open(os.path.join(outdir, codename), "w") as outfile: for line in code: outfile.write(line) else: with open(os.path.join(outdir, codename), "w", encoding=get_encoding()) as outfile: for line in code: outfile.write(line) ######################################################## # write notebook snippet linked_notebookname = re.sub( "\\\\", "/", os.path.join(rst2builddir, notebookname)) if basedir != outdir and tracker_id is not None: with open(os.path.join(outdir, notebookname), "w") as outfile: Utils.writeNoteBookEntry(outfile, renderer=renderer_name, tracker=tracker_name, transformers=transformer_names, tracker_path=tracker_path, options=list(renderer_options.items()) + list(tracker_options.items()) + list(transformer_options.items())) if filename_text is not None: linked_rstname = re.sub( "\\\\", "/", os.path.join(rst2builddir, rstname)) else: linked_rstname = None ########################################################## # Initialize collectors links = {'code_url': linked_codename, 'rst_url': linked_rstname, 'notebook_url': linked_notebookname} collectors = [] for name, collector in get_plugins("collect").items(): collectors.append(collector( template_name=template_name, outdir=outdir, rstdir=rstdir, builddir=builddir, srcdir=srcdir, content=content, display_options=display_options, trackerd_id=tracker_id, links=links)) # user renderers might not have a set_collectors method try: collect_here = not renderer.set_collectors(collectors) except AttributeError: collect_here = True ######################################################## # create and call dispatcher logger.debug("report_directive.run: creating dispatcher") dispatcher = Dispatcher.Dispatcher(tracker, renderer, transformers) # add the tracker options dispatcher_options.update(tracker_options) blocks = dispatcher(**dispatcher_options) if blocks is None: blocks = ResultBlocks( Utils.buildWarning( "NoData", "tracker %s returned no Data" % str(tracker))) code = None tracker_id = None except: exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() tb = "\n".join(traceback.format_tb(exceptionTraceback)) logger.error( "report_directive.run: exception caught at %s:%i: %s %s\n%s\n" % (str(document), lineno, exceptionType, exceptionValue, tb)) blocks = ResultBlocks(Utils.buildException("invocation")) code = None tracker_id = None links = {'code_url': "", 'rst_url': "", 'notebook_url': ""} logger.debug( "report_directive.run: profile: started: collecting: %s" % tag) ########################################################### # replace place holders or add text ########################################################### # add default for text-only output requested_urls = as_list(Utils.get_params()["report_urls"]) urls = [] if "code" in requested_urls: urls.append(":download:`code <%(code_url)s>`" % links) if "notebook" in requested_urls: urls.append(":download:`nb <%(notebook_url)s>`" % links) map_figure2text = {} if collect_here: for collector in collectors: map_figure2text.update(collector.collect(blocks)) map_figure2text["default-prefix"] = "" map_figure2text["default-suffix"] = "" if urls and "no-links" not in display_options: url_template = "[{}]".format(" ".join(urls)) else: url_template = "" map_figure2text["default-prefix"] = TEMPLATE_TEXT % locals() blocks.updatePlaceholders(map_figure2text) # render the output taking into account the layout lines = Utils.layoutBlocks(blocks, layout, long_titles=long_titles) lines.append("") # add caption if content and "no-caption" not in display_options: lines.extend(['::', '']) lines.extend([' %s' % row.strip() for row in content]) lines.append("") # encode lines if six.PY2: lines = [force_encode(x, encoding="ascii", errors="replace") for x in lines] # output rst text for this renderer if filename_text: if six.PY2: with open(filename_text, "w") as outf: outf.write("\n".join(lines)) else: with open(filename_text, "w", encoding=get_encoding()) as outf: outf.write("\n".join(lines)) if CGATREPORT_DEBUG: for x, l in enumerate(lines): try: print(("%5i %s" % (x, l))) except UnicodeEncodeError: print(("line skipped - unicode error")) pass if len(lines) and state_machine: state_machine.insert_input( lines, state_machine.input_lines.source(0)) logger.debug( "report_directive.run: profile: finished: collecting: %s" % tag) logger.debug( "report_directive.run: profile: finished: rst: %s:%i" % (str(document), lineno)) return []