Python path2strの例、CGATReport.DataTree.path2str Pythonの例

コード例 #1

0

ファイルを表示

ファイル: SlideShow.py プロジェクト: AndreasHegerGenomics/CGATReport

    def render(self, dataframe, path):

        blocks = ResultBlocks()

        options = self.get_slideshow_options()
        lines = [self.prefix % options]

        for title, row in dataframe.iterrows():
            row = row[row.notnull()]
            values = row.tolist()
            headers = list(row.index)

            dataseries = dict(zip(headers, values))
            try:
                # return value is a series
                filename = dataseries['filename']
            except KeyError:
                self.warn(
                    "no 'filename' key in path %s" % (path2str(path)))
                return blocks

            try:
                # return value is a series
                name = dataseries['name']
            except KeyError:
                self.warn(
                    "no 'name' key in path %s" % (path2str(path)))
                return blocks

            description, title = os.path.split(name)

            lines.extend(self.add_image(filename, title, description))

        lines.append("""</div>""")

        lines.append(self.skin % options)

        lines.append("""</div>""")

        lines = "\n".join(lines).split("\n")
        lines = [".. only::html\n"] +\
            ["   .. raw:: html\n"] +\
            ["      " + x for x in lines]

        lines = "\n".join(lines)

        blocks.append(ResultBlock(text=lines,
                                  title=path2str(path)))
        return blocks

コード例 #2

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

            def fillWorksheet(ws, dataframe, title):
                ws.append([""] + list(col_headers))
                for x, row in enumerate(dataframe.iterrows()):
                    ws.append([path2str(row[0])] + list(row[1]))

                # patch: maximum title length seems to be 31
                ws.title = title[:30]

コード例 #3

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):

        results = ResultBlocks()

        if dataframe is None:
            return results

        title = path2str(path)

        row_headers = dataframe.index
        col_headers = dataframe.columns

        # do not output large matrices as rst files
        if self.separate or (not self.force and
                             (len(row_headers) > self.max_rows or
                              len(col_headers) > self.max_cols)):
            if self.large == "xls":
                results.append(self.asSpreadSheet(dataframe, row_headers,
                                                  col_headers, title))
            else:
                results.append(self.asFile(dataframe, row_headers,
                                           col_headers, title))

        results.append(self.asRST(dataframe, row_headers, col_headers, title))

        return results

コード例 #4

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):
        result = ResultBlocks()
        texts = []
        if self.head or self.tail:
            if self.head:
                texts.append(str(dataframe.head(self.head)))
            if self.tail:
                texts.append(str(dataframe.tail(self.tail)))
        elif self.summary:
            texts.append(str(dataframe.describe()))
        else:
            texts.append(str(dataframe))

        # add indentation
        texts = ['\n'.join(['   %s' % y for y in x.split('\n')])
                 for x in texts]

        formatted = '''
::

%s

''' % '\n   ...\n'.join(texts)

        result.append(ResultBlock(formatted,
                                  title=path2str(path)))
        return result

コード例 #5

0

ファイルを表示

ファイル: RPlotter.py プロジェクト: WestFlame/CGATReport

 def endPlot(self, work, path):
     # currently: collects only single plots.
     figid = getCurrentRDevice()
     blocks = ResultBlocks(
         ResultBlock("\n".join(("#$rpl %i$#" % (figid), "")),
                     title=path2str(path)))
     return blocks

コード例 #6

0

ファイルを表示

ファイル: HoloviewsPlotter.py プロジェクト: AndreasHeger/CGATReport

 def endPlot(self, layout, legend, path):
     title = path2str(path)
     r = ResultBlock(
         text="#$hv {}$#".format(title),
         title=title)
     r.hv = layout
     
     return ResultBlocks(r)

コード例 #7

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def render(self, work, path):
        """render the data.
        """

        results = ResultBlocks(title=path)

        matrix, rows, columns = self.buildMatrix(work)

        title = path2str(path)

        if len(rows) == 0:
            return ResultBlocks(ResultBlock("", title=title))

        # do not output large matrices as rst files
        # separate and force need to be mixed in.
        if self.separate or (not self.force and
                             (len(rows) > self.max_rows or
                              len(columns) > self.max_cols)):
            return ResultBlocks(self.asFile(pandas.DataFrame(matrix,
                                                             index=rows,
                                                             columns=columns),
                                            rows,
                                            columns,
                                            title),
                                title=path)

        lines = []
        lines.append(".. csv-table:: %s" % title)
        lines.append('   :header: "track","%s" ' % '","'.join(columns))
        lines.append('')
        for row in range(len(rows)):
            lines.append(
                '   "%s","%s"' %
                (rows[row], '","'.join(
                    [self.toString(x) for x in matrix[row]])))
        lines.append("")

        if path is None:
            subtitle = ""
        else:
            subtitle = path2str(path)

        results.append(ResultBlock("\n".join(lines), title=subtitle))

        return results

コード例 #8

0

ファイルを表示

ファイル: Renderer.py プロジェクト: AndreasHegerGenomics/CGATReport

    def __call__(self, dataframe, path):

        # convert to dataframe
        # index has test names
        # columns are description, info, status
        columns = ('description', 'info', 'status', 'name')
        if set(dataframe.columns) != set(columns):
            raise ValueError("invalid columns: expected '%s', got '%s' " %
                             (columns, dataframe.columns))

        lines = []
        dirname = os.path.join(os.path.dirname(
            sys.modules["CGATReport"].__file__), "images")
        descriptions = {}
        title = "status"

        # add header
        lines.append(".. csv-table:: %s" % "table")
        lines.append('   :header: "Track", "Test", "", "Status", "Info"')
        lines.append('')
        rows = []
        for index, values in dataframe.iterrows():
            testname = values['name']
            status = values['status']
            try:
                image = ".. image:: {}\n    :width: 32".format(
                    os.path.join(dirname,
                                 self.map_code2image[status.upper()]))
            except KeyError:
                image = ""

            rows.append({
                "test": testname,
                "description": values["description"],
                "info": values['info'],
                "status": status,
                "track": path2str(index),
                "image": image,
            })
            descriptions[testname] = values["description"]

        # filter and sort table
        table = [self.columns]
        table.extend([[row[x] for x in self.columns] for row in rows])

        lines = Utils.table2rst(table).split("\n")

        if self.display_legend:
            lines.append(".. glossary::")
            lines.append("")

            for test, description in descriptions.items():
                lines.append('%s\n%s\n' % (Utils.indent(test, 3),
                                           Utils.indent(description, 6)))

        return ResultBlocks(ResultBlock("\n".join(lines), title=""))

コード例 #9

0

ファイルを表示

ファイル: RPlotter.py プロジェクト: WestFlame/CGATReport

    def render(self, dataframe, path):

        R.library('ggplot2')

        # add all indices as columns
        dataframe.reset_index(inplace=True)

        rframe = pandas.rpy.common.convert_to_r_dataframe(dataframe)

        # for the issue below, see:
        # http://stackoverflow.com/questions/12865218/getting-rid-of-asis-class-attribute
        unAsIs = R('''function (x) {
                         if(typeof(x) %in% c("integer","double")) {
                             class(x) <- "numeric"
                             return (x)}
                         else if (typeof(x) == "character") {
                             class(x) <- "character"
                             return (x) }
                         else {
                             return(x) } }''')

        rframe = R["as.data.frame"](R.lapply(rframe, unAsIs))
        R.assign("rframe", rframe)

        # start plot
        R('''gp = ggplot(rframe)''')

        # add aesthetics and geometries
        try:
            pp = R('''gp + %s ''' % self.statement)
        except ValueError as msg:
            raise ValueError(
                "could not interprete R statement: "
                "gp + %s; msg=%s" % (self.statement, msg))

        figname = re.sub('/', '_', path2str(path))
        r = ResultBlock('#$ggplot %s$#' % figname,
                        title=path2str(path))
        r.rggplot = pp
        r.figname = figname

        return ResultBlocks(r)

コード例 #10

0

ファイルを表示

ファイル: RPlotter.py プロジェクト: AndreasHegerGenomics/CGATReport

    def render(self, dataframe, path):

        self.startPlot()

        dataseries = Utils.toMultipleSeries(dataframe)

        names = [path2str(x[0]) for x in dataseries]
        data = [x[1] for x in dataseries]
        R.boxplot(data, names=names)

        return self.endPlot(dataframe, path)

コード例 #11

0

ファイルを表示

ファイル: RPlotter.py プロジェクト: AndreasHegerGenomics/CGATReport

    def render(self, dataframe, path):

        R.library('ggplot2')

        # add all indices as columns
        dataframe.reset_index(inplace=True)

        rframe = rpy2.robjects.pandas2ri.py2ri(dataframe)

        # for the issue below, see:
        # http://stackoverflow.com/questions/12865218/getting-rid-of-asis-class-attribute
        unAsIs =  R('''function (x) {
                      if("AsIs" %in% class(x)) {
                          class(x) <- class(x)[-match("AsIs", class(x))]
                      }
                      return (x) } ''')


        rframe = R["as.data.frame"](R.lapply(rframe, unAsIs))
        R.assign("rframe", rframe)

        # start plot
        R('''gp = ggplot(rframe)''')

        # add aesthetics and geometries
        try:
            pp = R('''gp + %s ''' % self.statement)
        except ValueError as msg:
            raise ValueError(
                "could not interprete R statement: "
                "gp + %s; msg=%s" % (self.statement, msg))

        figname = re.sub('/', '_', path2str(path))
        r = ResultBlock('#$ggplot %s$#' % figname,
                        title=path2str(path))
        r.rggplot = pp
        r.figname = figname

        return ResultBlocks(r)

コード例 #12

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):

        results = ResultBlocks()
        if dataframe is None:
            return results

        title = path2str(path)
        row_headers = dataframe.index
        col_headers = dataframe.columns
        results.append(self.asSpreadSheet(dataframe, row_headers,
                                          col_headers, title))

        return results

コード例 #13

0

ファイルを表示

ファイル: Renderer.py プロジェクト: sudlab/CGATReport

    def __call__(self, dataframe, path):

        # modify table (adding/removing columns) according to user options
        # matrix, row_headers, col_headers = \
        # self.modifyTable(matrix, row_headers, col_headers)
        dataframe = self.modifyTable(dataframe)

        title = path2str(path)

        results = ResultBlocks()

        row_headers = dataframe.index
        col_headers = dataframe.columns

        # as of sphinx 1.3.1, tables with more than 100 columns cause an
        # error:
        # Exception occurred:
        # File "/ifs/apps/apps/python-2.7.9/lib/python2.7/site-packages/docutils/writers/html4css1/__init__.py", line 642, in write_colspecs
        # colwidth = int(node['colwidth'] * 100.0 / width + 0.5)
        # ZeroDivisionError: float division by zero
        #
        # Thus, for table with more than 100 columns, force will be
        # disabled and max_cols set to a low value in order to make
        # sure the table is not displayed inline
        if len(col_headers) >= 90:
            self.force = False
            self.max_cols = 10

        # do not output large matrices as rst files
        if self.separate or (not self.force and
                             (len(row_headers) > self.max_rows or
                              len(col_headers) > self.max_cols)):
            if self.large == "xls":
                results.append(self.asSpreadSheet(dataframe, row_headers,
                                                  col_headers, title))
            else:
                results.append(self.asFile(dataframe, row_headers,
                                           col_headers, title))

            if self.preview:
                raise NotImplementedError('preview not implemented')
                row_headers = row_headers[:self.max_rows]
                col_headers = col_headers[:self.max_cols]
                # matrix = [x[:self.max_cols] for x in
                #          matrix[:self.max_rows]]
            else:
                return results

        results.append(self.asCSV(dataframe, row_headers, col_headers, title))

        return results

コード例 #14

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):

        # convert to dataframe
        # index has test names
        # columns are description, info, status
        columns = ('description', 'info', 'status', 'name')
        if set(dataframe.columns) != set(columns):
            raise ValueError("invalid columns: expected '%s', got '%s' " %
                             (columns, dataframe.columns))

        lines = []
        dirname = os.path.join(os.path.dirname(
            sys.modules["CGATReport"].__file__), "images")
        descriptions = {}
        title = "status"

        # add header
        lines.append(".. csv-table:: %s" % "table")
        lines.append('   :header: "Track", "Test", "", "Status", "Info"')
        lines.append('')

        for index, values in dataframe.iterrows():
            testname = values['name']
            description = values['description']
            info = values['info']
            status = values['status']
            track = path2str(index)

            descriptions[testname] = description

            try:
                image = ".. image:: %s" %\
                    os.path.join(dirname, self.map_code2image[status.upper()])
            except KeyError:
                image = ""

            lines.append(
                '   "%(track)s", ":term:`%(testname)s`", "%(image)s", "%(status)s", "%(info)s"' %
                locals())

        lines.append("")

        lines.append(".. glossary::")
        lines.append("")

        for test, description in descriptions.items():
            lines.append('%s\n%s\n' % (Utils.indent(test, 3),
                                       Utils.indent(description, 6)))

        return ResultBlocks(ResultBlock("\n".join(lines), title=""))

コード例 #15

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):

        results = ResultBlocks()

        if dataframe is None:
            return results

        title = path2str(path)

        lines = []
        lines.append(".. glossary::")
        lines.append("")

        for x, row in enumerate(dataframe.iterrows()):
            header, data = row
            txt = "\n      ".join([x.strip() for x in str(data).split("\n")])
            lines.append('   %s\n      %s\n' % (path2str(header), txt))

        lines.append("")

        results.append(ResultBlock("\n".join(lines), title=title))

        return results

コード例 #16

0

ファイルを表示

ファイル: RPlotter.py プロジェクト: WestFlame/CGATReport

    def render(self, dataframe, path):

        self.startPlot()

        dataseries = Utils.toMultipleSeries(dataframe)

        # import pdb; pdb.set_trace()
        # rframe = pandas.rpy.common.convert_to_r_dataframe(dataframe)
        # R.boxplot(rframe)
        names = [path2str(x[0]) for x in dataseries]
        data = [x[1] for x in dataseries]
        R.boxplot(data, names=names)

        return self.endPlot(dataframe, path)

コード例 #17

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def render(self, data):

        # initiate output structure
        results = ResultBlocks(title='user')

        labels = DataTree.getPaths(data)
        # iterate over all items at leaf
        for path, branch in DataTree.getNodes(data, len(labels) - 2):
            for key in Utils.TrackerKeywords:
                if key in branch:
                    # add a result block
                    results.append(ResultBlock(branch[key],
                                               title=path2str(path)))

        return results

コード例 #18

0

ファイルを表示

ファイル: BokehPlotter.py プロジェクト: AndreasHeger/CGATReport

    def endPlot(self, plts, legends, path):
        """close plots.
        """

        title = path2str(path)
        figid = 10
        lines = []
        figid = self.bokeh_figure._id
        lines.append("")
        lines.append("#$bkh %s$#" % figid)
        lines.append("")
        r = ResultBlock("\n".join(lines), title=title)
        r.bokeh = self.bokeh_figure

        return ResultBlocks(r)

コード例 #19

0

ファイルを表示

ファイル: BokehPlotter.py プロジェクト: AndreasHeger/CGATReport

    def endPlot(self, plots, legends, path):
        """close plots.
        """

        result = ResultBlocks()
        title = path2str(path)

        for plot in plots:
            figid = plot._id
            lines = []
            lines.append("")
            lines.append("#$bkh %s$#" % figid)
            lines.append("")
            r = ResultBlock("\n".join(lines), title=title)
            r.bokeh = plot
            result.append(r)

        return result

コード例 #20

0

ファイルを表示

ファイル: RPlotter.py プロジェクト: WestFlame/CGATReport

    def render(self, dataframe, path):

        fig = self.startPlot()

        labels = dataframe.index.levels

        paths = list(itertools.product(*labels))

        self.initPlot(fig, dataframe, path)

        nplotted = 0

        for idx in range(0, len(paths), 2):

            self.initLine(path, dataframe)

            xpath = paths[idx]
            ypath = paths[idx + 1]

            xvalues, yvalues = dataframe.ix[xpath], dataframe.ix[ypath]

            if len(xvalues) != len(yvalues):
                raise ValueError(
                    "length of x,y tuples not consistent: %i != %i" %
                    len(xvalues), len(yvalues))

            R.plot(xvalues, yvalues)
            self.initCoords(xvalues, yvalues)

            nplotted += 1

        self.finishPlot(fig, dataframe, path)

        figid = getCurrentRDevice()
        blocks = ResultBlocks(
            ResultBlock("\n".join(("#$rpl %i$#" % (figid), "")),
                        path2str(path)))

        return blocks

コード例 #21

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):

        # modify table (adding/removing columns) according to user options
        # matrix, row_headers, col_headers = \
        # self.modifyTable(matrix, row_headers, col_headers)
        dataframe = self.modifyTable(dataframe)

        title = path2str(path)

        results = ResultBlocks()

        row_headers = dataframe.index
        col_headers = dataframe.columns

        # do not output large matrices as rst files
        if self.separate or (not self.force and
                             (len(row_headers) > self.max_rows or
                              len(col_headers) > self.max_cols)):
            if self.large == "xls":
                results.append(self.asSpreadSheet(dataframe, row_headers,
                                                  col_headers, title))
            else:
                results.append(self.asFile(dataframe, row_headers,
                                           col_headers, title))

            if self.preview:
                raise NotImplementedError('preview not implemented')
                row_headers = row_headers[:self.max_rows]
                col_headers = col_headers[:self.max_cols]
                # matrix = [x[:self.max_cols] for x in
                #          matrix[:self.max_rows]]
            else:
                return results

        results.append(self.asCSV(dataframe, row_headers, col_headers, title))

        return results

コード例 #22

0

ファイルを表示

ファイル: TransformersGeneLists.py プロジェクト: AndreasHegerGenomics/CGATReport

    def transform(self, data):

        # check if data is melted:
        if len(data.columns) != 1:
            raise ValueError(
                'transformer requires dataframe with'
                'a single column, got %s' % data.columns)
        column = data.columns[0]

        # iterate over lowest levels to build a dictionary of
        # sets
        genesets = {}
        nlevels = Utils.getDataFrameLevels(data)
        for key, group in data.groupby(level=range(nlevels)):
            genesets[path2str(key)] = set(group[column])

        keys = genesets.keys()

        background = None
        foreground = []
        for key in keys:
            if "background" in key:
                background = genesets[key]
            else:
                foreground.append(key)

        if len(keys) < 3 or background is None:
            raise ValueError(
                "Expected at least 3 lists, with one called background, "
                "instead got %i lists called %s" %
                (len(keys), ", ".join(keys)))

        missing = {
            y: [str(x) for x in genesets[y]
                if x not in background] for y in foreground}

        if any([len(missing[x]) > 0 for x in missing]):
            missing_items = "\n\t".join(
                ["%s:\t%s" % (x, ",".join(missing[x])) for x in missing])
            raise ValueError(
                "Found items in lists not in background. "
                "Missing items:\n\t %s" % missing_items)

        M = len(set(background))
        if len(keys) == 2:

            n = len(set(genesets[keys[1]]))
            N = len(set(genesets[keys[0]]))
            x = len(set(genesets[keys[0]]) & set(genesets[keys[1]]))

            p = scipy.stats.hypergeom.sf(x, M, n, N)

            fc = ((x + 0.0) / N) / ((n + 0.0) / M)

            values = [("Enrichment", fc),
                      ("P-value", p)]
        else:
            enrichments = []
            pvals = []
            As = []
            Bs = []
            for a, b in itertools.combinations(keys, 2):
                N = len(set(genesets[a]))
                n = len(set(genesets[b]))
                x = len(set(genesets[a]) & set(genesets[b]))

                p = scipy.stats.hypergeom.sf(x, M, n, N)

                fc = ((x + 0.0) / N) / ((n + 0.0) / M)

                As.append(a)
                Bs.append(b)
                pvals.append(p)
                enrichments.append(fc)

            values = [("ListA", As),
                      ("ListB", Bs),
                      ("Enrichment", enrichments),
                      ("P-value", pvals)]

        return DataTree.listAsDataFrame(values, values_are_rows=True)

コード例 #23

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def __call__(self, dataframe, path):
        '''iterate over leaves/branches in data structure.

        This method will call the:meth:`render` method
        '''
        result = ResultBlocks()

        if not self.split_at:
            result.extend(self.render(dataframe, path))
        else:
            # split dataframe at first index
            level = Utils.getGroupLevels(dataframe)
            grouper = dataframe.groupby(level=level)
            if len(grouper) < self.split_at:
                result.extend(self.render(dataframe, path))
            else:
                # build groups
                always, remove_always = [], set()

                if self.split_always:
                    for key, work in grouper:
                        for pat in self.split_always:
                            rx = re.compile(pat)
                            if rx.search(path2str(key)):
                                always.append((key, work))
                                remove_always.add(key)

                    grouper = dataframe.groupby(level=level)

                def _group_group(grouper, always, remove_always):
                    group = always[:]
                    for key, work in grouper:

                        if key in remove_always:
                            continue
                        group.append((key, work))

                        if len(group) >= self.split_at:
                            yield group
                            group = always[:]

                    # reconcile index names
                    yield group

                first = True
                for group in _group_group(grouper,
                                          always,
                                          remove_always):
                    # do not plot last dataframe that contains
                    # only the common tracks to plot
                    if not first and len(group) == len(always):
                        continue
                    first = False

                    df = pandas.concat(
                        [x[1] for x in group])

                    # reconcile index names
                    df.index.names = dataframe.index.names
                    result.extend(self.render(df, path))

        return result

コード例 #24

0

ファイルを表示

ファイル: Renderer.py プロジェクト: sudlab/CGATReport

    def __call__(self, dataframe, path):
        '''iterate over leaves/branches in data structure.

        This method will call the:meth:`render` method.

        Large dataframes are split into multiple, smaller rendered
        objects if self.split_at is not zero.

        By default, dataframes are split along the hierachical
        index. However, if there is only a single index, but multiple
        columns, the split is performed on the columns instead. This
        is used when splitting coordinate data as a result of the
        histogram transformation.

        '''
        result = ResultBlocks()

        if not self.split_at:
            result.extend(self.render(dataframe, path))
        else:
            # split dataframe at first index
            level = Utils.getGroupLevels(dataframe)
            grouper = dataframe.groupby(level=level)

            # split dataframe column wise if only one index
            # and multiple columns
            if len(grouper) == 1 and len(dataframe.columns) > self.split_at:
                columns = list(dataframe.columns)
                always = []
                if self.split_keep_first_column:
                    always.append(columns[0])
                # columns to always keep
                always.extend([c for c in columns if c in self.split_always])
                columns = [c for c in columns if c not in always]
                for x in range(0, len(columns), self.split_at):
                    # extract a set of columns
                    result.extend(self.render(
                        dataframe.loc[:, always+columns[x:x+self.split_at]],
                        path))
            # split dataframe along index
            elif len(grouper) >= self.split_at:
                # build groups
                always, remove_always = [], set()

                if self.split_always:
                    for key, work in grouper:
                        for pat in self.split_always:
                            rx = re.compile(pat)
                            if rx.search(path2str(key)):
                                always.append((key, work))
                                remove_always.add(key)

                    grouper = dataframe.groupby(level=level)

                def _group_group(grouper, always, remove_always):
                    group = always[:]
                    for key, work in grouper:

                        if key in remove_always:
                            continue
                        group.append((key, work))

                        if len(group) >= self.split_at:
                            yield group
                            group = always[:]

                    # reconcile index names
                    yield group

                first = True
                for group in _group_group(grouper,
                                          always,
                                          remove_always):
                    # do not plot last dataframe that contains
                    # only the common tracks to plot
                    if not first and len(group) == len(always):
                        continue
                    first = False

                    df = pandas.concat(
                        [x[1] for x in group])

                    # reconcile index names
                    df.index.names = dataframe.index.names
                    result.extend(self.render(df, path))
            else:
                # do not split dataframe
                result.extend(self.render(dataframe, path))

        return result

コード例 #25

0

ファイルを表示

ファイル: Renderer.py プロジェクト: WestFlame/CGATReport

    def asSpreadSheet(self, dataframe, row_headers, col_headers, title):
        '''save the table as an xls file.

        Multiple files of the same Renderer/Tracker combination are
        distinguished by the title.
        '''

        self.debug("%s: saving %i x %i table as spread-sheet'" %
                   (id(self),
                    len(row_headers),
                    len(col_headers)))

        is_hierarchical = isinstance(dataframe.index,
                                     pandas.core.index.MultiIndex)

        split = is_hierarchical and len(dataframe.index.levels) > 1

        quick = len(dataframe) > 10000
        if quick and not split:
            # quick writing, only append method works
            wb = openpyxl.Workbook(optimized_write=True)

            def fillWorksheet(ws, dataframe, title):
                ws.append([""] + list(col_headers))
                for x, row in enumerate(dataframe.iterrows()):
                    ws.append([path2str(row[0])] + list(row[1]))

                # patch: maximum title length seems to be 31
                ws.title = title[:30]

        else:
            # do it cell-by-cell, this might be slow
            wb = openpyxl.Workbook(optimized_write=False)

            def fillWorksheet(ws, dataframe, title):
                # regex to detect rst hypelinks
                regex_link = re.compile('`(.*) <(.*)>`_')
                # write row names
                for row, row_name in enumerate(dataframe.index):
                    # rows and columns start at 1
                    c = ws.cell(row=row + 2, column=1)
                    c.value = row_name

                # write columns
                for column, column_name in enumerate(dataframe.columns):
                    # set column title
                    # rows and columns start at 1
                    c = ws.cell(row=1, column=column + 2)
                    c.value = column_name

                    # set column values
                    dataseries = dataframe[column_name]

                    if dataseries.dtype == object:
                        for row, value in enumerate(dataseries):
                            c = ws.cell(row=row + 2,
                                        column=column + 2)
                            value = str(value)
                            if value.startswith('`'):
                                c.value, c.hyperlink =\
                                    regex_link.match(value).groups()
                            else:
                                c.value = value
                    else:
                        for row, value in enumerate(dataseries):
                            c = ws.cell(row=row + 2,
                                        column=column + 2)
                            c.value = value
                # patch: maximum title length seems to be 31
                ws.title = re.sub("/", "_", title)[:30]

        if len(wb.worksheets) == 0:
            wb.create_sheet()

        if split:
            # create separate worksheets for nested indices
            nlevels = len(dataframe.index.levels)
            paths = map(tuple, DataTree.unique(
                [x[:nlevels - 1]
                 for x in dataframe.index.unique()]))

            ws = wb.worksheets[0]
            ws.title = 'Summary'
            ws.append(
                [""] * (nlevels - 1) + ["Worksheet", "Rows"])

            for row, path in enumerate(paths):
                # select data frame as cross-section
                work = dataframe.xs(path, axis=0)
                title = path2str(path)
                if len(title) > 30:
                    title = "sheet%i" % row

                ws.append(list(path) + [title, len(work)])
                c = ws.cell(row=row + 1,
                            column=nlevels)
                # this does not work in oocalc
                c.hyperlink = "#%s!A1" % title
                fillWorksheet(wb.create_sheet(),
                              work,
                              title=title)
        else:
            fillWorksheet(wb.worksheets[0], dataframe,
                          title=title)

        # write result block
        lines = []
        lines.append("`%i x %i table <#$xls %s$#>`__" %
                     (len(row_headers), len(col_headers),
                      title))
        lines.append("")

        r = ResultBlock("\n".join(lines), title=title)
        r.xls = wb

        self.debug("%s: saved %i x %i table as spread-sheet'" %
                   (id(self),
                    len(row_headers),
                    len(col_headers)))
        return r