Ejemplo n.º 1
0
    def asSpreadSheet(self, dataframe, row_headers, col_headers, title):
        '''save the table as an xls file.

        Multiple files of the same Renderer/Tracker combination are
        distinguished by the title.
        '''

        self.debug("%s: saving %i x %i table as spread-sheet'" %
                   (id(self),
                    len(row_headers),
                    len(col_headers)))

        is_hierarchical = isinstance(dataframe.index,
                                     pandas.core.index.MultiIndex)

        split = is_hierarchical and len(dataframe.index.levels) > 1

        quick = len(dataframe) > 10000
        if quick and not split:
            # quick writing, only append method works
            wb = openpyxl.Workbook(optimized_write=True)

            def fillWorksheet(ws, dataframe, title):
                ws.append([""] + list(col_headers))
                for x, row in enumerate(dataframe.iterrows()):
                    ws.append([path2str(row[0])] + list(row[1]))

                # patch: maximum title length seems to be 31
                ws.title = title[:30]

        else:
            # do it cell-by-cell, this might be slow
            wb = openpyxl.Workbook(optimized_write=False)

            def fillWorksheet(ws, dataframe, title):
                # regex to detect rst hypelinks
                regex_link = re.compile('`(.*) <(.*)>`_')
                # write row names
                for row, row_name in enumerate(dataframe.index):
                    # rows and columns start at 1
                    c = ws.cell(row=row + 2, column=1)
                    c.value = row_name

                # write columns
                for column, column_name in enumerate(dataframe.columns):
                    # set column title
                    # rows and columns start at 1
                    c = ws.cell(row=1, column=column + 2)
                    c.value = column_name

                    # set column values
                    dataseries = dataframe[column_name]

                    if dataseries.dtype == object:
                        for row, value in enumerate(dataseries):
                            c = ws.cell(row=row + 2,
                                        column=column + 2)
                            value = str(value)
                            if value.startswith('`'):
                                c.value, c.hyperlink =\
                                    regex_link.match(value).groups()
                            else:
                                c.value = value
                    else:
                        for row, value in enumerate(dataseries):
                            c = ws.cell(row=row + 2,
                                        column=column + 2)
                            c.value = value
                # patch: maximum title length seems to be 31
                ws.title = re.sub("/", "_", title)[:30]

        if len(wb.worksheets) == 0:
            wb.create_sheet()

        if split:
            # create separate worksheets for nested indices
            nlevels = len(dataframe.index.levels)
            paths = map(tuple, DataTree.unique(
                [x[:nlevels - 1]
                 for x in dataframe.index.unique()]))

            ws = wb.worksheets[0]
            ws.title = 'Summary'
            ws.append(
                [""] * (nlevels - 1) + ["Worksheet", "Rows"])

            for row, path in enumerate(paths):
                # select data frame as cross-section
                work = dataframe.xs(path, axis=0)
                title = path2str(path)
                if len(title) > 30:
                    title = "sheet%i" % row

                ws.append(list(path) + [title, len(work)])
                c = ws.cell(row=row + 1,
                            column=nlevels)
                # this does not work in oocalc
                c.hyperlink = "#%s!A1" % title
                fillWorksheet(wb.create_sheet(),
                              work,
                              title=title)
        else:
            fillWorksheet(wb.worksheets[0], dataframe,
                          title=title)

        # write result block
        lines = []
        lines.append("`%i x %i table <#$xls %s$#>`__" %
                     (len(row_headers), len(col_headers),
                      title))
        lines.append("")

        r = ResultBlock("\n".join(lines), title=title)
        r.xls = wb

        self.debug("%s: saved %i x %i table as spread-sheet'" %
                   (id(self),
                    len(row_headers),
                    len(col_headers)))
        return r