Exemple #1
0
def getTransformers(transformers, kwargs={}):
    '''find and instantiate all transformers.'''

    result = []
    for transformer in transformers:
        k = "transform-%s" % transformer
        if k in Component.getPlugins()["transform"]:
            cls = Component.getPlugins()["transform"][k]
            instance = cls(**kwargs)
        else:
            instance = makeTransformer(transformer, (), kwargs)

        if not instance:
            msg = "could not find transformer '%s'. Available transformers:\n  %s" % \
                (transformer,
                 "\n  ".join(sorted(getPlugins()["transform"].keys())))
            raise KeyError(msg)

        result.append(instance)

    return result
Exemple #2
0
def getRenderer(renderer_name, kwargs={}):
    '''find and instantiate renderer.'''

    renderer = None

    try:
        cls = Component.getPlugins()["render"]["render-%s" % renderer_name]
        renderer = cls(**kwargs)
    except KeyError:
        # This was uncommented to fix one bug
        # but uncommenting invalidates user renderers
        # TODO: needs to be revisited
        renderer = makeRenderer(renderer_name, kwargs)

    if not renderer:
        raise KeyError(
            "could not find renderer '%s'. Available renderers:\n  %s" %
            (renderer_name,
             "\n  ".join(
                 sorted(Component.getPlugins()["render"].keys()))))

    return renderer
def setup(app):
    setup.app = app
    setup.config = app.config
    setup.confdir = app.confdir
    setup.srcdir = app.srcdir
    setup.builddir = os.getcwd()
    app.add_directive('report', report_directive)

    # update global parameters in Utils module.
    PARAMS = Utils.get_parameters()
    app.add_config_value('PARAMS', collections.defaultdict(), 'env')

    setup.logger = Component.get_logger()
    return {'parallel_read_safe': True}
    def run(self):
        document = self.state.document.current_source
        logger = Component.get_logger()
        logger.info("report_directive: starting: %s:%i" %
                    (str(document), self.lineno))

        env = self.state.document.settings.env

        return run(self.arguments,
                   self.options,
                   self.lineno,
                   self.content,
                   self.state_machine,
                   document,
                   build_environment=env)
def run(arguments,
        options,
        lineno,
        content,
        state_machine=None,
        document=None,
        srcdir=None,
        builddir=None,
        build_environment=None):
    """process:report: directive.

    *srdir* - top level directory of rst documents
    *builddir* - build directory
    """

    tag = "%s:%i" % (str(document), lineno)

    logger = Component.get_logger()

    logger.debug("report_directive.run: profile: started: rst: %s" % tag)

    # sort out the paths
    # reference is used for time-stamping
    tracker_name = directives.uri(arguments[0])

    (basedir, fname, basename, ext, outdir,
     codename, notebookname) = Utils.build_paths(tracker_name)

    # get the directory of the rst file
    # state_machine.document.attributes['source'])
    rstdir, rstfile = os.path.split(document)
    # root of document tree
    if srcdir is None:
        srcdir = setup.srcdir

    # build directory
    if builddir is None:
        builddir = setup.builddir

    # remove symbolic links
    srcdir, builddir, rstdir = [
        os.path.abspath(os.path.realpath(x)) for x in (srcdir, builddir, rstdir)]

    # there are three directories:
    # builddir = directory where document is built in
    #            (usually _build/html or similar)
    # rstdir   = directory where rst sources are located
    # srcdir   = directory from which the build process is started

    # path to root relative to rst
    rst2srcdir = os.path.join(os.path.relpath(srcdir, start=rstdir), outdir)

    # path to root relative to rst
    rst2builddir = os.path.join(
        os.path.relpath(builddir, start=rstdir), outdir)

    # path relative to source (for images)
    root2builddir = os.path.join(
        os.path.relpath(builddir, start=srcdir), outdir)

    logger.debug(
        "report_directive.run: arguments=%s, options=%s, lineno=%s, "
        "content=%s, document=%s" %
        (str(arguments),
         str(options),
         str(lineno),
         str(content),
         str(document)))
    logger.debug(
        "report_directive.run: plotdir=%s, basename=%s, ext=%s, "
        "fname=%s, rstdir=%s, srcdir=%s, builddir=%s" %
        (tracker_name, basename, ext, fname, rstdir, srcdir, builddir))
    logger.debug(
        "report_directive.run: tracker_name=%s, basedir=%s, "
        "rst2src=%s, root2build=%s, outdir=%s, codename=%s" %
        (tracker_name, basedir, rst2srcdir, rst2builddir, outdir, codename))

    # try to create. If several processes try to create it,
    # testing with `if` will not work.
    try:
        os.makedirs(outdir)
    except OSError as msg:
        pass

    if not os.path.exists(outdir):
        raise OSError("could not create directory %s: %s" % (outdir, msg))

    ########################################################
    # collect options
    # replace placedholders
    try:
        options = update_options(options)
    except ValueError as msg:
        logger.warn("failure while updating options: %s" % msg)

    logger.debug("report_directive.run: options=%s" % (str(options),))

    transformer_names = []
    renderer_name = None

    layout = options.get("layout", "column")
    long_titles = "long-titles" in options

    option_map = get_option_map()
    renderer_options = select_and_delete_options(
        options, option_map["render"])
    transformer_options = select_and_delete_options(
        options, option_map["transform"])
    dispatcher_options = select_and_delete_options(
        options, option_map["dispatch"])
    tracker_options = select_and_delete_options(
        options, option_map["tracker"], expand=["tracker"])
    display_options = get_default_display_options()
    display_options.update(select_and_delete_options(
        options, option_map["display"]))

    logger.debug("report_directive.run: renderer options: %s" %
                 str(renderer_options))
    logger.debug("report_directive.run: transformer options: %s" %
                 str(transformer_options))
    logger.debug("report_directive.run: dispatcher options: %s" %
                 str(dispatcher_options))
    logger.debug("report_directive.run: tracker options: %s" %
                 str(tracker_options))
    logger.debug("report_directive.run: display options: %s" %
                 str(display_options))

    if "transform" in display_options:
        transformer_names = display_options["transform"].split(",")
        del display_options["transform"]

    if "render" in display_options:
        renderer_name = display_options["render"]
        del display_options["render"]

    ########################################################
    # check for missing files
    if renderer_name is not None:

        options_key = str(renderer_options) +\
            str(transformer_options) +\
            str(dispatcher_options) +\
            str(tracker_options) +\
            str(transformer_names) +\
            re.sub("\s", "", "".join(content))

        options_hash = hashlib.md5(options_key.encode()).hexdigest()[:10]

        template_name = Utils.quote_filename(
            Config.SEPARATOR.join((tracker_name, renderer_name,
                                   options_hash)))
        filename_text = os.path.join(outdir, "%s.txt" % (template_name))
        rstname = os.path.basename(filename_text)
        notebookname += options_hash

        logger.debug("report_directive.run: options_hash=%s" % options_hash)

        ###########################################################
        # check for existing files
        # update strategy does not use file stamps, but checks
        # for presence/absence of text element and if all figures
        # mentioned in the text element are present
        ###########################################################
        queries = [re.compile("%s/(\S+.%s)" %
                              (root2builddir, suffix))
                   for suffix in ("png", "pdf", "svg")]

        logger.debug("report_directive.run: checking for changed files.")

        # check if text element exists
        if os.path.exists(filename_text):

            with open(filename_text, "r", encoding="utf-8") as inf:
                lines = [x[:-1] for x in inf]
            filenames = []
            
            # check if all figures are present
            for line in lines:
                for query in queries:
                    x = query.search(line)
                    if x:
                        filenames.extend(list(x.groups()))

            filenames = [os.path.join(outdir, x) for x in filenames]
            if len(filenames) == 0:
                logger.info("report_directive.run: %s: redo: no files found" % tag)
            else:
                logger.debug(
                    "report_directive.run: %s: checking for %s" %
                    (tag, str(filenames)))
                for filename in filenames:
                    if not os.path.exists(filename):
                        logger.info(
                            "report_directive.run: %s: redo: file %s is missing" %
                            (tag, filename))
                        break
                else:
                    logger.info(
                        "report_directive.run: %s: noredo: all files are present" %
                        tag)
                    # all is present - save text and return
                    if lines and state_machine:
                        state_machine.insert_input(
                            lines, state_machine.input_lines.source(0))
                    return []
        else:
            logger.debug(
                "report_directive.run: %s: no check performed: %s missing" %
                (tag, str(filename_text)))
    else:
        template_name = ""
        filename_text = None

    collect_here = False
    ##########################################################
    # instantiate tracker, dispatcher, renderer and transformers
    # and collect output
    ###########################################################
    try:
        ########################################################
        # find the tracker
        logger.debug(
            "report_directive.run: collecting tracker %s with options %s " %
            (tracker_name, tracker_options))
        code, tracker, tracker_path = make_tracker(
            tracker_name, (), tracker_options)
        if not tracker:
            logger.error(
                "report_directive.run: no tracker - no output from %s " %
                str(document))
            raise ValueError("tracker `%s` not found" % tracker_name)

        logger.debug(
            "report_directive.run: collected tracker %s" % tracker_name)

        tracker_id = Cache.tracker2key(tracker)

        ########################################################
        # determine the transformer
        logger.debug("report_directive.run: creating transformers")

        transformers = get_transformers(
            transformer_names, transformer_options)

        ########################################################
        # determine the renderer
        logger.debug("report_directive.run: creating renderer.")

        if renderer_name is None:
            logger.error(
                "report_directive.run: no renderer - no output from %s" %
                str(document))
            raise ValueError("the report directive requires a renderer")

        renderer = get_renderer(renderer_name, renderer_options)

        try:
            renderer.set_paths(rstdir, srcdir, builddir)
            renderer.set_display_options(display_options)
            renderer.set_build_environment(build_environment)
        except AttributeError:
            # User renderers will not have these methods
            pass

        ########################################################
        # write code output
        linked_codename = re.sub("\\\\", "/", os.path.join(rst2builddir, codename))
        if code and basedir != outdir:
            if six.PY2:
                with open(os.path.join(outdir, codename), "w") as outfile:
                    for line in code:
                        outfile.write(line)
            else:
                with open(os.path.join(outdir, codename), "w",
                          encoding=get_encoding()) as outfile:
                    for line in code:
                        outfile.write(line)

        ########################################################
        # write notebook snippet
        linked_notebookname = re.sub(
            "\\\\", "/", os.path.join(rst2builddir, notebookname))

        if basedir != outdir and tracker_id is not None:
            with open(os.path.join(outdir, notebookname), "w") as outfile:
                Utils.writeNoteBookEntry(outfile,
                                         renderer=renderer_name,
                                         tracker=tracker_name,
                                         transformers=transformer_names,
                                         tracker_path=tracker_path,
                                         options=list(renderer_options.items()) +
                                         list(tracker_options.items()) +
                                         list(transformer_options.items()))

        if filename_text is not None:
            linked_rstname = re.sub(
                "\\\\", "/", os.path.join(rst2builddir, rstname))
        else:
            linked_rstname = None

        ##########################################################
        # Initialize collectors
        links = {'code_url': linked_codename,
                 'rst_url': linked_rstname,
                 'notebook_url': linked_notebookname}

        collectors = []
        for name, collector in get_plugins("collect").items():
            collectors.append(collector(
                template_name=template_name,
                outdir=outdir,
                rstdir=rstdir,
                builddir=builddir,
                srcdir=srcdir,
                content=content,
                display_options=display_options,
                trackerd_id=tracker_id,
                links=links))

        # user renderers might not have a set_collectors method
        try:
            collect_here = not renderer.set_collectors(collectors)
        except AttributeError:
            collect_here = True

        ########################################################
        # create and call dispatcher
        logger.debug("report_directive.run: creating dispatcher")

        dispatcher = Dispatcher.Dispatcher(tracker,
                                           renderer,
                                           transformers)

        # add the tracker options
        dispatcher_options.update(tracker_options)
        blocks = dispatcher(**dispatcher_options)

        if blocks is None:
            blocks = ResultBlocks(
                Utils.buildWarning(
                    "NoData",
                    "tracker %s returned no Data" % str(tracker)))
            code = None
            tracker_id = None

    except:
        exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
        tb = "\n".join(traceback.format_tb(exceptionTraceback))

        logger.error(
            "report_directive.run: exception caught at %s:%i: %s %s\n%s\n" %
            (str(document), lineno,
             exceptionType,
             exceptionValue,
             tb))

        blocks = ResultBlocks(Utils.buildException("invocation"))
        code = None
        tracker_id = None
        links = {'code_url': "",
                 'rst_url': "",
                 'notebook_url': ""}

    logger.debug(
        "report_directive.run: profile: started: collecting: %s" % tag)

    ###########################################################
    # replace place holders or add text
    ###########################################################
    # add default for text-only output
    requested_urls = as_list(Utils.get_params()["report_urls"])

    urls = []
    if "code" in requested_urls:
        urls.append(":download:`code <%(code_url)s>`" % links)

    if "notebook" in requested_urls:
        urls.append(":download:`nb <%(notebook_url)s>`" % links)

    map_figure2text = {}

    if collect_here:
        for collector in collectors:
            map_figure2text.update(collector.collect(blocks))

    map_figure2text["default-prefix"] = ""
    map_figure2text["default-suffix"] = ""

    if urls and "no-links" not in display_options:
        url_template = "[{}]".format(" ".join(urls))
    else:
        url_template = ""

    map_figure2text["default-prefix"] = TEMPLATE_TEXT % locals()

    blocks.updatePlaceholders(map_figure2text)

    # render the output taking into account the layout
    lines = Utils.layoutBlocks(blocks, layout, long_titles=long_titles)
    lines.append("")

    # add caption
    if content and "no-caption" not in display_options:
        lines.extend(['::', ''])
        lines.extend(['    %s' % row.strip() for row in content])
        lines.append("")

    # encode lines
    if six.PY2:
        lines = [force_encode(x, encoding="ascii", errors="replace") for x in lines]

    # output rst text for this renderer
    if filename_text:
        if six.PY2:
            with open(filename_text, "w") as outf:
                outf.write("\n".join(lines))
        else:
            with open(filename_text, "w", encoding=get_encoding()) as outf:
                outf.write("\n".join(lines))

    if CGATREPORT_DEBUG:
        for x, l in enumerate(lines):
            try:
                print(("%5i %s" % (x, l)))
            except UnicodeEncodeError:
                print(("line skipped - unicode error"))
                pass

    if len(lines) and state_machine:
        state_machine.insert_input(
            lines, state_machine.input_lines.source(0))

    logger.debug(
        "report_directive.run: profile: finished: collecting: %s" % tag)
    logger.debug(
        "report_directive.run: profile: finished: rst: %s:%i" %
        (str(document), lineno))

    return []
Exemple #6
0
def tree2table(data, transpose=False, head=None):
    """build table from data.

    The table will be multi-level (main-rows and sub-rows), if:

       1. there is more than one column
       2. each cell within a row is a list or tuple

    If any of the paths contain tuples/lists, these are
    expanded to extra columns as well.

    If head is given, only first head rows are output.

    returns matrix, row_headers, col_headers
    """
    logger = Component.get_logger()

    labels = getPaths(data)

    if len(labels) < 2:
        raise ValueError("expected at least two levels for building table, got %i: %s" %
                         (len(labels), str(labels)))

    effective_labels = count_levels(labels)
    # subtract last level (will be expanded) and 1 for row header
    effective_cols = sum(effective_labels[:-1]) - 1

    col_headers = [""] * effective_cols + labels[-1]
    ncols = len(col_headers)

    paths = list(itertools.product(*labels[1:-1]))
    header_offset = effective_cols
    matrix = []

    logger.debug(
        "Datatree.buildTable: creating table with %i columns" %
        (len(col_headers)))

    # the following can be made more efficient
    # by better use of indices
    row_offset = 0
    row_headers = []

    # iterate over main rows
    for x, row in enumerate(labels[0]):

        first = True
        for xx, path in enumerate(paths):

            # get data - skip if there is None
            work = getLeaf(data, (row,) + path)
            if isinstance(work, pandas.DataFrame):
                if work.empty:
                    continue
            else:
                if not work:
                    continue

            row_data = [""] * ncols

            # add row header only for first row (if there are sub-rows)
            if first:
                if type(row) in Utils.ContainerTypes:
                    row_headers.append(row[0])
                    for z, p in enumerate(row[1:]):
                        row_data[z] = p
                else:
                    row_headers.append(row)
                first = False
            else:
                row_headers.append("")

            # enter data for the first row
            for z, p in enumerate(path):
                row_data[z] = p

            # check for multi-level rows
            is_container = True
            max_rows = None
            for y, column in enumerate(labels[-1]):
                if column not in work:
                    continue
                if type(work[column]) not in Utils.ContainerTypes:
                    is_container = False
                    break
                if max_rows == None:
                    max_rows = len(work[column])
                elif max_rows != len(work[column]):
                    raise ValueError("multi-level rows - unequal lengths: %i != %i" %
                                     (max_rows, len(work[column])))

            # add sub-rows
            if is_container:
                # multi-level rows
                for z in range(max_rows):
                    for y, column in enumerate(labels[-1]):
                        try:
                            row_data[
                                y + header_offset] = Utils.quote_rst(work[column][z])
                        except KeyError:
                            pass

                    if z < max_rows - 1:
                        matrix.append(row_data)
                        row_headers.append("")
                        row_data = [""] * ncols
            else:
                # single level row
                for y, column in enumerate(labels[-1]):
                    try:
                        row_data[
                            y + header_offset] = Utils.quote_rst(work[column])
                    except KeyError:
                        pass

            matrix.append(row_data)

            if head and len(matrix) >= head:
                break

    if transpose:
        row_headers, col_headers = col_headers, row_headers
        matrix = list(zip(*matrix))

    # convert headers to string (might be None)
    row_headers = [str(x) for x in row_headers]
    col_headers = [str(x) for x in col_headers]

    return matrix, row_headers, col_headers
Exemple #7
0
def as_dataframe(data, tracker=None):
    '''convert data tree to pandas DataFrame.

    The data frame is multi-indexed according to the depth within the
    data tree.

    If the data-tree has only one level, the data will be
    single-indexed because pandas will not tolerate a single level
    MultiIndex.

    The code assumes that the data tree has a uniform
    depth and structure.

    The inner-most level in the *data* tree will be columns. However,
    if *data* is only a single-level dictionary, the keys in the
    dictionary will be row labels and the resultant dataframe will
    have only one column.

    Depending on the type of the leaf, the data frame is constructed
    as follows:

    Leaves are multiple arrays of the same size

        The data is assumed to be coordinate type data (x,y,z
        values). Leaves will be added to a dataframe as multiple
        columns.

    Leaves are a single array or arrays with dissimilar size

        A melted data frame will be constructed where
        the hierarchical index contains the path information
        and the data frame has a single column with the value.

    Leaf is a dataframe

        Dataframes will be concatenated. Existing indices
        of the dataframes will be preserved with the exception
        of the trivial index for the row numbers.

        Requires:
            All dataframes need to have the same columns.

    Leaf is a scalar
        Dataframes will be built from a nested dictionary

    Special cases for backwards compatibility:

    1. Lowest level dictionary contains the following arrays: rows,
        columns, matrix - numpy matrix, convert to dataframe and apply
        as above

    2. Lowest level dictionary contains the following keys:
        '01', '10', '11' - Venn 2-set data, convert columns
        '001', '010', ... - Venn 3-set data, convert columns

    Pandas attempts to find a column data type that will
    fit all values in a column. Thus, if a column is numeric,
    but contains values such as "inf", "Inf", as well, the
    column type might be set to object or char.

    '''
    if data is None or len(data) == 0:
        return None

    logger = Component.get_logger()

    levels = getDepths(data)
    if len(levels) == 0:
        return None

    mi, ma = min(levels), max(levels)
    if mi != ma:
        raise NotImplementedError(
            'data tree not of uniform depth, min=%i, max=%i' %
            (mi, ma))

    labels = getPaths(data)

    ######################################################
    ######################################################
    ######################################################
    # check special cases
    MATRIX = ('rows', 'columns', 'matrix')
    VENN2 = ('10', '01', '11')
    VENN3 = ('010', '001', '011')
    dataframe_prune_index = True
    branches = list(getNodes(data, len(labels) - 2))
    for path, branch in branches:
        # numpy matrix - dictionary with keys matrix, rows, columns
        if len(set(branch.keys()).intersection(MATRIX)) == len(MATRIX):
            df = pandas.DataFrame(branch['matrix'],
                                  columns=branch['columns'],
                                  index=branch['rows'])
            setLeaf(data, path, df)
            dataframe_prune_index = False

        elif len(set(branch.keys()).intersection(VENN2)) == len(VENN2) or \
                len(set(branch.keys()).intersection(VENN3)) == len(VENN3):
            # sort so that 'labels' is not the first item
            # specify data such that 'labels' will a single tuple entry
            values = sorted(branch.items())
            df = listAsDataFrame(values)
            dataframe_prune_index = False
            setLeaf(data, path, df)

    ######################################################
    ######################################################
    ######################################################
    labels = getPaths(data)
    # build multi-index
    leaves = list(getNodes(data, len(labels) - 1))

    # if set to a number, any superfluous levels in the
    # hierarchical index of the final dataframe will
    # be removed.
    expected_levels = None

    leaf = leaves[0][1]

    if is_array(leaf):

        # build dataframe from arrays
        dataframes = []
        index_tuples = []

        # not a nested dictionary
        if len(labels) == 1:
            branches = [(('all',), data)]
        else:
            branches = list(getNodes(data, max(0, len(labels) - 2)))

        # check if it is coordinate data
        # All arrays need to have the same length
        is_coordinate = True
        for path, subtree in branches:
            lengths = [len(x) for x in list(subtree.values())]
            if len(lengths) == 0:
                continue

            # all arrays have the same length - coordinate data
            if len(lengths) == 1 or min(lengths) != max(lengths):
                is_coordinate = False
                break

        if is_coordinate:
            logger.debug('dataframe conversion: from array - coordinates')
            for path, leaves in branches:
                # skip empty leaves
                if len(leaves) == 0:
                    continue
                dataframes.append(pandas.DataFrame(leaves))
                index_tuples.append(path)
        else:
            logger.debug('dataframe conversion: from array - series')
            # arrays of unequal length are measurements
            # build a melted data frame with a single column
            # given by the name of the path.
            for key, leave in leaves:
                # skip empty leaves
                if len(leave) == 0:
                    continue
                index_tuples.append(key)
                dataframes.append(pandas.DataFrame(leave,
                                                   columns=('value',)))

        expected_levels = len(index_tuples[0])
        df = concatDataFrames(dataframes, index_tuples)

    elif is_dataframe(leaf):
        logger.debug('dataframe conversion: from dataframe')

        # build dataframe from list of dataframes
        # by concatenation.
        # Existing indices of the dataframes will
        # be added as columns.
        dataframes = []
        index_tuples = []
        path_lengths = []
        levels = []
        for path, dataframe in leaves:
            if len(dataframe) == 0:
                continue
            path_lengths.append(len(path))
            if len(path) == 1:
                # if only one level, do not use tuple
                index_tuples.append(path[0])
            else:
                index_tuples.append(path)
            dataframes.append(dataframe)

            levels.append(Utils.getDataFrameLevels(
                dataframe,
                test_for_trivial=True))

        if len(path_lengths) == 0:
            return None

        assert min(path_lengths) == max(path_lengths)
        assert min(levels) == max(levels)

        # if only a single dataframe without given
        # tracks, return dataframe
        if index_tuples == ["all"]:
            df = dataframes[0]
            # if index is a simple numeric list, change to "all"
            if isinstance(df.index, pandas.Int64Index) and \
               df.index.name is None:
                df.index = ["all"] * len(df)
            return df

        expected_levels = min(path_lengths) + min(levels)
        df = concatDataFrames(dataframes, index_tuples)

    else:
        logger.debug('dataframe conversion: from values')
        if len(labels) == 1:
            # { 'x': 1, 'y': 2 } -> DF with one row and two columns (x, y)
            df = pandas.DataFrame(list(data.values()), index=list(data.keys()))
        elif len(labels) == 2:
            # { 'a': {'x':1, 'y':2}, 'b': {'y',2}
            # -> DF with two columns(x,y) and two rows(a,b)
            df = pandas.DataFrame.from_dict(data).transpose()
            # reorder so that order of columns corresponds to data
            df = df[labels[-1]]
        else:
            # We are dealing with a simple nested dictionary
            branches = list(getNodes(data, max(0, len(labels) - 3)))
            dataframes = []
            index_tuples = []
            for path, nested_dict in branches:
                # transpose to invert columns and rows
                # in cgatreport convention, the deeper
                # level in a dictionary in cgatreport are columns, while
                # in pandas they are rows.
                df = pandas.DataFrame(nested_dict).transpose()
                dataframes.append(df)
                index_tuples.extend([path])
            df = concatDataFrames(dataframes, index_tuples)

    # remove index with row numbers
    if expected_levels is not None and dataframe_prune_index:
        Utils.pruneDataFrameIndex(df, expected_levels)

    # rename levels in hierarchical index
    is_hierarchical = isinstance(df.index,
                                 pandas.core.index.MultiIndex)

    if is_hierarchical:
        n = list(df.index.names)
        try:
            if tracker is not None:
                l = getattr(tracker, "levels")
        except AttributeError:
            l = ["track", "slice"] + ["level%i" % x for x in range(len(n))]

        for x, y in enumerate(n):
            if y is None:
                n[x] = l[x]
        df.index.names = n
    else:
        df.index.name = 'track'

    return df
def run(arguments,
        options,
        lineno,
        content,
        state_machine=None,
        document=None,
        srcdir=None,
        builddir=None):
    """process:report: directive.

    *srdir* - top level directory of rst documents
    *builddir* - build directory
    """

    tag = "%s:%i" % (str(document), lineno)

    logging.debug("report_directive.run: profile: started: rst: %s" % tag)

    # sort out the paths
    # reference is used for time-stamping
    tracker_name = directives.uri(arguments[0])

    (basedir, fname, basename, ext, outdir,
     codename, notebookname) = Utils.build_paths(tracker_name)

    # get the directory of the rst file
    # state_machine.document.attributes['source'])
    rstdir, rstfile = os.path.split(document)
    # root of document tree
    if srcdir is None:
        srcdir = setup.srcdir

    # build directory
    if builddir is None:
        builddir = setup.builddir

    # remove symbolic links
    srcdir, builddir, rstdir = [
        os.path.realpath(x) for x in (srcdir, builddir, rstdir)]

    # there are three directories:
    # builddir = directory where document is built in
    #            (usually _build/html or similar)
    # rstdir   = directory where rst sources are located
    # srcdir   = directory from which the build process is started

    # path to root relative to rst
    rst2srcdir = os.path.join(os.path.relpath(srcdir, start=rstdir), outdir)

    # path to root relative to rst
    rst2builddir = os.path.join(
        os.path.relpath(builddir, start=rstdir), outdir)

    # path relative to source (for images)
    root2builddir = os.path.join(
        os.path.relpath(builddir, start=srcdir), outdir)

    logging.debug(
        "report_directive.run: arguments=%s, options=%s, lineno=%s, "
        "content=%s, document=%s" %
        (str(arguments),
         str(options),
         str(lineno),
         str(content),
         str(document)))
    logging.debug(
        "report_directive.run: plotdir=%s, basename=%s, ext=%s, "
        "fname=%s, rstdir=%s, srcdir=%s, builddir=%s" %
        (tracker_name, basename, ext, fname, rstdir, srcdir, builddir))
    logging.debug(
        "report_directive.run: tracker_name=%s, basedir=%s, "
        "rst2src=%s, root2build=%s, outdir=%s, codename=%s" %
        (tracker_name, basedir, rst2srcdir, rst2builddir, outdir, codename))

    # try to create. If several processes try to create it,
    # testing with `if` will not work.
    try:
        os.makedirs(outdir)
    except OSError as msg:
        pass

    if not os.path.exists(outdir):
        raise OSError("could not create directory %s: %s" % (outdir, msg))

    ########################################################
    # collect options
    # replace placedholders
    try:
        options = Utils.updateOptions(options)
    except ValueError as msg:
        logging.warn("failure while updating options: %s" % msg)

    logging.debug("report_directive.run: options=%s" % (str(options),))

    transformer_names = []
    renderer_name = None

    # get layout option
    layout = options.get("layout", "column")

    option_map = Component.getOptionMap()
    renderer_options = Utils.selectAndDeleteOptions(
        options, option_map["render"])
    transformer_options = Utils.selectAndDeleteOptions(
        options, option_map["transform"])
    dispatcher_options = Utils.selectAndDeleteOptions(
        options, option_map["dispatch"])
    tracker_options = Utils.selectAndDeleteOptions(
        options, option_map["tracker"])
    display_options = Utils.selectAndDeleteOptions(
        options, option_map["display"])

    logging.debug("report_directive.run: renderer options: %s" %
                  str(renderer_options))
    logging.debug("report_directive.run: transformer options: %s" %
                  str(transformer_options))
    logging.debug("report_directive.run: dispatcher options: %s" %
                  str(dispatcher_options))
    logging.debug("report_directive.run: tracker options: %s" %
                  str(tracker_options))
    logging.debug("report_directive.run: display options: %s" %
                  str(display_options))

    if "transform" in display_options:
        transformer_names = display_options["transform"].split(",")
        del display_options["transform"]

    if "render" in display_options:
        renderer_name = display_options["render"]
        del display_options["render"]

    ########################################################
    # check for missing files
    if renderer_name is not None:

        options_key = str(renderer_options) +\
            str(transformer_options) +\
            str(dispatcher_options) +\
            str(tracker_options) +\
            str(transformer_names)

        options_hash = hashlib.md5(options_key.encode()).hexdigest()

        template_name = Utils.quote_filename(
            Config.SEPARATOR.join((tracker_name, renderer_name,
                                   options_hash)))
        filename_text = os.path.join(outdir, "%s.txt" % (template_name))

        notebookname += options_hash

        logging.debug("report_directive.run: options_hash=%s" % options_hash)

        ###########################################################
        # check for existing files
        # update strategy does not use file stamps, but checks
        # for presence/absence of text element and if all figures
        # mentioned in the text element are present
        ###########################################################
        queries = [re.compile("%s(%s\S+.%s)" %
                              (root2builddir, outdir, suffix))
                   for suffix in ("png", "pdf", "svg")]

        logging.debug("report_directive.run: checking for changed files.")

        # check if text element exists
        if os.path.exists(filename_text):

            lines = [x[:-1] for x in open(filename_text, "r").readlines()]
            filenames = []

            # check if all figures are present
            for line in lines:
                for query in queries:
                    x = query.search(line)
                    if x:
                        filenames.extend(list(x.groups()))

            logging.debug(
                "report_directive.run: %s: checking for %s" %
                (tag, str(filenames)))
            for filename in filenames:
                if not os.path.exists(filename):
                    logging.info(
                        "report_directive.run: %s: redo: %s missing" %
                        (tag, filename))
                    break
            else:
                logging.info(
                    "report_directive.run: %s: noredo: all files are present" %
                    tag)
                # all is present - save text and return
                if lines and state_machine:
                    state_machine.insert_input(
                        lines, state_machine.input_lines.source(0))
                return []
        else:
            logging.debug(
                "report_directive.run: %s: no check performed: %s missing" %
                (tag, str(filename_text)))
    else:
        template_name = ""
        filename_text = None

    ##########################################################
    # Initialize collectors
    collectors = []
    for collector in list(Component.getPlugins("collect").values()):
        collectors.append(collector())

    ##########################################################
    # instantiate tracker, dispatcher, renderer and transformers
    # and collect output
    ###########################################################
    try:
        ########################################################
        # find the tracker
        logging.debug(
            "report_directive.run: collecting tracker %s with options %s " %
            (tracker_name, tracker_options))
        code, tracker, tracker_path = Utils.makeTracker(
            tracker_name, (), tracker_options)
        if not tracker:
            logging.error(
                "report_directive.run: no tracker - no output from %s " %
                str(document))
            raise ValueError("tracker `%s` not found" % tracker_name)

        logging.debug(
            "report_directive.run: collected tracker %s" % tracker_name)

        tracker_id = Cache.tracker2key(tracker)

        ########################################################
        # determine the transformer
        logging.debug("report_directive.run: creating transformers")

        transformers = Utils.getTransformers(
            transformer_names, transformer_options)

        ########################################################
        # determine the renderer
        logging.debug("report_directive.run: creating renderer.")

        if renderer_name is None:
            logging.error(
                "report_directive.run: no renderer - no output from %s" %
                str(document))
            raise ValueError("the report directive requires a renderer")

        renderer = Utils.getRenderer(renderer_name, renderer_options)

        try:
            renderer.set_paths(rstdir, srcdir, builddir)
            renderer.set_display_options(display_options)
        except AttributeError:
            # User renderers will not have these methods
            pass

        ########################################################
        # create and call dispatcher
        logging.debug("report_directive.run: creating dispatcher")

        dispatcher = Dispatcher.Dispatcher(tracker,
                                           renderer,
                                           transformers)

        # add the tracker options
        dispatcher_options.update(tracker_options)

        blocks = dispatcher(**dispatcher_options)

        if blocks is None:
            blocks = ResultBlocks(ResultBlocks(
                Utils.buildWarning(
                    "NoData",
                    "tracker %s returned no Data" % str(tracker))))
            code = None
            tracker_id = None

    except:

        logging.warn(
            "report_directive.run: exception caught at %s:%i - see document" %
            (str(document), lineno))

        blocks = ResultBlocks(ResultBlocks(
            Utils.buildException("invocation")))
        code = None
        tracker_id = None

    logging.debug(
        "report_directive.run: profile: started: collecting: %s" % tag)

    ########################################################
    # write code output
    linked_codename = re.sub("\\\\", "/", os.path.join(rst2srcdir, codename))
    if code and basedir != outdir:
        with open(os.path.join(outdir, codename), "w") as outfile:
            for line in code:
                outfile.write(line)

    ########################################################
    # write notebook snippet
    linked_notebookname = re.sub(
        "\\\\", "/", os.path.join(rst2srcdir, notebookname))
    if basedir != outdir and tracker_id is not None:
        with open(os.path.join(outdir, notebookname), "w") as outfile:
            Utils.writeNoteBookEntry(outfile,
                                     renderer=renderer_name,
                                     tracker=tracker_name,
                                     transformers=transformer_names,
                                     tracker_path=tracker_path,
                                     options=renderer_options.items() +
                                     tracker_options.items() +
                                     transformer_options.items())

    ###########################################################
    # collect images
    ###########################################################
    map_figure2text = {}
    links = {'code_url': linked_codename,
             'notebook_url': linked_notebookname}
    try:
        for collector in collectors:
            map_figure2text.update(collector.collect(
                blocks,
                template_name,
                outdir,
                rstdir,
                builddir,
                srcdir,
                content,
                display_options,
                tracker_id,
                links=links))
    except:

        logging.warn("report_directive.run: exception caught while "
                     "collecting with %s at %s:%i - see document" %
                     (collector, str(document), lineno))
        blocks = ResultBlocks(ResultBlocks(
            Utils.buildException("collection")))
        code = None
        tracker_id = None

    ###########################################################
    # replace place holders or add text
    ###########################################################
    # add default for text-only output
    urls = Utils.asList(Utils.PARAMS["report_urls"])
    code_url, nb_url = "", ""
    if "code" in urls:
        code_url = "`code <%(code_url)s>`__" % links

    if "notebook" in urls:
        nb_url = '`nb <%(notebook_url)s>`__' % links

    map_figure2text["default-prefix"] = TEMPLATE_TEXT % locals()
    map_figure2text["default-suffix"] = ""
    blocks.updatePlaceholders(map_figure2text)

    # render the output taking into account the layout
    lines = Utils.layoutBlocks(blocks, layout)
    lines.append("")

    # add caption
    lines.extend(['::', ''])
    if content:
        lines.extend(['    %s' % row.strip() for row in content])
        lines.append("")

    lines.append("")

    # output rst text for this renderer
    if filename_text:
        outfile = open(filename_text, "w")
        outfile.write("\n".join(lines))
        outfile.close()

    if CGATREPORT_DEBUG:
        for x, l in enumerate(lines):
            print("%5i %s" % (x, l))

    if len(lines) and state_machine:
        state_machine.insert_input(
            lines, state_machine.input_lines.source(0))

    logging.debug(
        "report_directive.run: profile: finished: collecting: %s" % tag)
    logging.debug(
        "report_directive.run: profile: finished: rst: %s:%i" %
        (str(document), lineno))

    return []
Exemple #9
0
def main(argv=None, **kwargs):
    '''main function for test.py.

    Long-form of command line arguments can also be supplied as kwargs.

    If argv is not None, command line parsing will be performed.
    '''
    logger = Component.get_logger()

    parser = optparse.OptionParser(version="%prog version: $Id$",
                                   usage=globals()["__doc__"])

    parser.add_option("-t", "--tracker", dest="tracker", type="string",
                      help="tracker to use [default=%default]")

    parser.add_option("-p", "--page", dest="page", type="string",
                      help="render an rst page [default=%default]")

    parser.add_option("-a", "--tracks", dest="tracks", type="string",
                      help="tracks to use [default=%default]")

    parser.add_option("-m", "--transformer", dest="transformers",
                      type="string", action="append",
                      help="add transformation [default=%default]")

    parser.add_option("-s", "--slices", dest="slices", type="string",
                      help="slices to use [default=%default]")

    parser.add_option("-r", "--renderer", dest="renderer", type="string",
                      help="renderer to use [default=%default]")

    parser.add_option("-w", "--path", "--trackerdir",
                      dest="trackerdir", type="string",
                      help="path to trackers [default=%default]")

    parser.add_option("-f", "--force", dest="force", action="store_true",
                      help="force recomputation of data by deleting cached "
                      "results [default=%default]")

    parser.add_option("-o", "--option", dest="options", type="string",
                      action="append",
                      help="renderer options - supply as key=value pairs "
                      "(without spaces). [default=%default]")

    parser.add_option("-l", "--language", dest="language", type="choice",
                      choices=("rst", "notebook"),
                      help="output language for snippet. Use ``rst`` "
                      "to create a snippet to paste "
                      "into a cgatreport document. Use ``notebook`` to "
                      "create a snippet to paste "
                      "into an ipython notebook [default=%default]")

    parser.add_option("--no-print", dest="do_print", action="store_false",
                      help="do not print an rst text element to create "
                      "the displayed plots [default=%default].")

    parser.add_option("--no-show", dest="do_show", action="store_false",
                      help="do not show a plot [default=%default].")

    parser.add_option("--layout", dest="layout", type="string",
                      help="output rst with layout [default=%default].")

    parser.add_option("-i", "--start-interpreter", dest="start_interpreter",
                      action="store_true",
                      help="do not render, but start python interpreter "
                      "[default=%default].")

    parser.add_option("-I", "--ii", "--start-ipython", dest="start_ipython",
                      action="store_true",
                      help="do not render, start ipython interpreter "
                      "[default=%default].")

    parser.add_option(
        "--workdir", dest="workdir", type="string",
        help="working directory - change to this directory "
        "before executing "
        "[default=%default]")

    parser.add_option(
        "--hardcopy", dest="hardcopy", type="string",
        help="output images of plots. The parameter should "
        "contain one or more %s "
        "The suffix determines the type of plot. "
        "[default=%default].")

    parser.set_defaults(
        loglevel=1,
        tracker=None,
        transformers=[],
        tracks=None,
        slices=None,
        options=[],
        renderer="table",
        do_show=True,
        do_print=True,
        force=False,
        trackerdir=TRACKERDIR,
        caption="add caption here",
        start_interpreter=False,
        start_ipython=False,
        language="rst",
        workdir=None,
        layout=None,
        dpi=100)

    if argv is None and len(kwargs) == 0:
        argv = sys.argv

    if argv:
        (options, args) = parser.parse_args(argv)
    else:
        (options, args) = parser.parse_args([])

        ######################################################
        # set keyword arguments as options
        for keyword, value in list(kwargs.items()):
            if hasattr(options, keyword):
                setattr(options, keyword, value)
                del kwargs[keyword]

        # change some kwarguments
        if options.transformers:
            for keyword, value in list(kwargs.items()):
                if keyword.startswith("tf"):
                    kwargs["tf-{}".format(keyword[2:])] = value

    if options.workdir is not None:
        savedir = os.getcwd()
        os.chdir(options.workdir)
    else:
        savedir = None

    if args:
        update_options_from_blob(kwargs, options, args)

    Utils.update_parameters(sorted(glob.glob("*.ini")))

    ######################################################
    # configure options
    options.trackerdir = os.path.abspath(
        os.path.expanduser(options.trackerdir))
    if os.path.exists(options.trackerdir):
        sys.path.insert(0, options.trackerdir)
    else:
        logger.warn("directory %s does not exist" % options.trackerdir)

    ######################################################
    # test plugins
    for x in options.options:
        if "=" in x:
            data = x.split("=")
            key, val = [y.strip() for y in (data[0], "=".join(data[1:]))]
        else:
            key, val = x.strip(), None
        kwargs[key] = val

    if options.tracks:
        kwargs["tracks"] = options.tracks
    if options.slices:
        kwargs["slices"] = options.slices

    kwargs = update_options(kwargs)

    option_map = get_option_map()
    renderer_options = select_and_delete_options(
        kwargs, option_map["render"])
    transformer_options = select_and_delete_options(
        kwargs, option_map["transform"])
    display_options = select_and_delete_options(
        kwargs, option_map["display"])
    tracker_options = select_and_delete_options(
        kwargs, option_map["tracker"], expand=["tracker"])

    ######################################################
    # decide whether to render or not
    if options.renderer == "none" or options.start_interpreter or \
       options.start_ipython or options.language == "notebook":
        renderer = None
    else:
        renderer = get_renderer(options.renderer, {**renderer_options,
                                                   **kwargs})

    try:
        rstdir = os.getcwd()
        srcdir = os.getcwd()
        builddir = os.getcwd()
        renderer.set_paths(rstdir, srcdir, builddir)
        renderer.set_display_options(display_options)
    except AttributeError:
        # User renderers will not have these methods
        pass

    transformers = get_transformers(
        options.transformers, transformer_options)

    exclude = set(("Tracker",
                   "TrackerSQL",
                   "returnLabeledData",
                   "returnMultipleColumnData",
                   "returnMultipleColumns",
                   "returnSingleColumn",
                   "returnSingleColumnData",
                   "SQLError",
                   "MultipleColumns",
                   "MultipleColumnData",
                   "LabeledData",
                   "DataSimple",
                   "Data"))

    ######################################################
    # build from tracker
    if options.tracker:

        if "." in options.tracker:
            parts = options.tracker.split(".")
            tracker_modulename = ".".join(parts[:-1])
            tracker_name = parts[-1]
        else:
            tracker_modulename = None
            tracker_name = options.tracker

        try:
            _code, tracker, tracker_path = make_tracker(
                options.tracker, (), tracker_options)
        except ImportError:
            # try to find class in module
            trackers = []

            for filename in glob.glob(
                    os.path.join(options.trackerdir, "*.py")):
                modulename = os.path.basename(filename)
                trackers.extend(
                    [x for x in get_available_trackers(modulename)
                     if x[0] not in exclude])

            for name, tracker_class, modulename, is_derived in trackers:
                if name == tracker_name:
                    if tracker_modulename is not None:
                        if modulename == tracker_modulename:
                            break
                    else:
                        tracker_modulename = modulename
                        break
            else:
                available_trackers = set([x[0] for x in trackers if x[3]])
                print((
                    "unknown tracker '%s': possible trackers are\n  %s" %
                    (options.tracker, "\n  ".join(sorted(available_trackers)))))
                print(
                    "(the list above does not contain functions).")
                sys.exit(1)

            # instantiate functors
            if is_derived:
                tracker = tracker_class(**kwargs)
            #  but not functions
            else:
                tracker = tracker_class

        # remove everything related to that tracker for a clean slate
        if options.force:
            removed = CGATReport.clean.removeTracker(tracker_name)
            print(("removed all data for tracker %s: %i files" %
                   (tracker_name, len(removed))))

        dispatcher = Dispatcher(tracker, renderer, transformers)

        if renderer is None:
            # dispatcher.parseArguments(**kwargs)
            # result = dispatcher.collect()
            # result = dispatcher.transform()
            result = dispatcher(**kwargs)
            options.do_print = options.language == "notebook"
            options.do_show = False
            options.hardcopy = False
        else:
            # needs to be resolved between renderer and dispatcher options
            result = dispatcher(**kwargs)

        if options.do_print:

            sys.stdout.write(".. ---- TEMPLATE START --------\n\n")

            if options.language == "rst":
                writeRST(sys.stdout,
                         options,
                         kwargs,
                         renderer_options,
                         transformer_options,
                         display_options,
                         tracker_modulename,
                         tracker_name)
            elif options.language == "notebook":
                writeNotebook(sys.stdout,
                              options,
                              kwargs,
                              renderer_options,
                              transformer_options,
                              display_options,
                              tracker_modulename,
                              tracker_name)

            sys.stdout.write("\n.. ---- TEMPLATE END ----------\n")

        sys.stdout.write("\n.. ---- OUTPUT-----------------\n")

        if result and renderer is not None:
            if options.layout is not None:
                lines = Utils.layoutBlocks(result, layout=options.layout)
                print(("\n".join(lines)))
            else:
                for r in result:
                    if r.title:
                        print("")
                        print(("title: %s" % r.title))
                        print("")
                    for ss in str(r).split("\n"):
                        print(force_encode(ss))

        if options.hardcopy:

            fig_managers = _pylab_helpers.Gcf.get_all_fig_managers()
            # create all the images
            for figman in fig_managers:
                # create all images
                figid = figman.num
                outfile = re.sub("%s", str(figid), options.hardcopy)
                figman.canvas.figure.savefig(outfile, dpi=options.dpi)

        if result and options.do_show:
            if options.renderer.startswith("r-"):
                for r in result:
                    if hasattr(r, 'rggplot'):
                        from rpy2.robjects import r as R
                        import rpy2.rinterface
                        try:
                            R.plot(r.rggplot)
                        except rpy2.rinterface.RRuntimeError as msg:
                            if re.search("object.*not found", str(msg)):
                                print(('%s: available columns in dataframe=%s' %
                                       (msg,
                                        R('''colnames(rframe)'''))))

                print("press Ctrl-c to stop")
                while 1:
                    pass

            elif len(_pylab_helpers.Gcf.get_all_fig_managers()) > 0:
                plt.show()

            else:
                for rr in result:
                    if hasattr(r, 'xls'):
                        tmpfile, outpath = tempfile.mkstemp(
                            dir='.', suffix='.xlsx')
                        os.close(tmpfile)
                        print(('saving xlsx to %s' % outpath))
                        r.xls.save(outpath)
                    elif hasattr(r, 'bokeh'):
                        import bokeh.plotting as bk
                        bk.show(r.bokeh)

    ######################################################
    # build page
    elif options.page:

        from CGATReport import build
        CGATReport.report_directive.DEBUG = True
        CGATReport.report_directive.FORCE = True

        if not os.path.exists(options.page):
            raise IOError("page %s does not exist" % options.page)

        options.num_jobs = 1

        build.buildPlots(
            [options.page, ], options, [], os.path.dirname(options.page))

        if options.do_show:
            if options.renderer.startswith("r-"):
                print("press Ctrl-c to stop")
                while 1:
                    pass

            elif _pylab_helpers.Gcf.get_all_fig_managers() > 0:
                plt.show()

    else:
        raise ValueError(
            "please specify either a tracker "
            "(-t/--tracker) or a page (-p/--page) to test")

    if savedir is not None:
        os.chdir(savedir)

    if options.tracker and renderer is None:
        datatree = dispatcher.getDataTree()
        dataframe = dispatcher.getDataFrame()

        # trying to push R objects
        # from rpy2.robjects import r as R
        # for k, v in flat_iterator(datatree):
        #     try:
        #         R.assign(k, v)
        #     except ValueError, msg:
        #         print ("could not push %s: %s" % (k,msg))
        #         pass
        # print ("----------------------------------------")
        if options.start_interpreter:
            print ("--> cgatreport - available data structures <--")
            print(("    datatree=%s" % type(datatree)))
            print(("    dataframe=%s" % type(dataframe)))
            interpreter = code.InteractiveConsole(
                dict(list(globals().items()) + list(locals().items())))
            interpreter.interact()
            return dataframe
        elif options.start_ipython:
            import IPython
            IPython.embed()
            return dataframe

        return dataframe