Ejemplo n.º 1
0
    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename
        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.strings_to_numbers = options.get('strings_to_numbers', True)
        self.default_date_format = options.get('default_date_format', None)
        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.optimization = options.get('constant_memory', 0)
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []

        # Add the default cell format.
        self.add_format({'xf_index': 0})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = \
                self.add_format({'num_format': self.default_date_format})
Ejemplo n.º 2
0
    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename
        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.strings_to_numbers = options.get('strings_to_numbers', False)
        self.default_date_format = options.get('default_date_format', None)
        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.optimization = options.get('constant_memory', 0)
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []

        # Add the default cell format.
        self.add_format({'xf_index': 0})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = \
                self.add_format({'num_format': self.default_date_format})
Ejemplo n.º 3
0
class Workbook(xmlwriter.XMLwriter):
    """
    A class for writing the Excel XLSX Workbook file.


    """

    ###########################################################################
    #
    # Public API.
    #
    ###########################################################################

    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename
        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.strings_to_numbers = options.get('strings_to_numbers', False)
        self.strings_to_formulas = options.get('strings_to_formulas', True)
        self.strings_to_urls = options.get('strings_to_urls', True)
        self.default_date_format = options.get('default_date_format', None)
        self.optimization = options.get('constant_memory', False)
        self.in_memory = options.get('in_memory', False)
        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []
        self.border_count = 0
        self.fill_count = 0
        self.drawing_count = 0

        # We can't do 'constant_memory' mode while doing 'in_memory' mode.
        if self.in_memory:
            self.optimization = False

        # Add the default cell format.
        self.add_format({'xf_index': 0})

        # Add a default URL format.
        self.default_url_format = self.add_format({'color': 'blue',
                                                   'underline': 1})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = \
                self.add_format({'num_format': self.default_date_format})

    def __del__(self):
        """Close file in destructor if it hasn't been closed explicitly."""
        if not self.fileclosed:
            self.close()

    def add_worksheet(self, name=None):
        """
        Add a new worksheet to the Excel workbook.

        Args:
            name: The worksheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a worksheet object.

        """
        sheet_index = len(self.worksheets_objs)
        name = self._check_sheetname(name)

        # Initialisation data to pass to the worksheet.
        init_data = {
            'name': name,
            'index': sheet_index,
            'str_table': self.str_table,
            'worksheet_meta': self.worksheet_meta,
            'optimization': self.optimization,
            'tmpdir': self.tmpdir,
            'date_1904': self.date_1904,
            'strings_to_numbers': self.strings_to_numbers,
            'strings_to_formulas': self.strings_to_formulas,
            'strings_to_urls': self.strings_to_urls,
            'default_date_format': self.default_date_format,
            'default_url_format': self.default_url_format,
        }

        worksheet = Worksheet()
        worksheet._initialize(init_data)

        self.worksheets_objs.append(worksheet)
        self.sheetnames.append(name)

        return worksheet

    def add_format(self, properties={}):
        """
        Add a new Format to the Excel Workbook.

        Args:
            properties: The format properties.

        Returns:
            Reference to a Format object.

        """
        xf_format = Format(properties,
                           self.xf_format_indices,
                           self.dxf_format_indices)

        # Store the format reference.
        self.formats.append(xf_format)

        return xf_format

    def add_chart(self, options):
        """
        Create a chart object.

        Args:
            options: The chart type and subtype options.

        Returns:
            Reference to a Chart object.

        """

        # Type must be specified so we can create the required chart instance.
        chart_type = options.get('type', 'None')
        if chart_type is None:
            warn("Chart type must be defined in add_chart()")
            return

        if chart_type == 'area':
            chart = ChartArea(options)
        elif chart_type == 'bar':
            chart = ChartBar(options)
        elif chart_type == 'column':
            chart = ChartColumn(options)
        elif chart_type == 'line':
            chart = ChartLine(options)
        elif chart_type == 'pie':
            chart = ChartPie(options)
        elif chart_type == 'radar':
            chart = ChartRadar(options)
        elif chart_type == 'scatter':
            chart = ChartScatter(options)
        elif chart_type == 'stock':
            chart = ChartStock(options)
        else:
            warn("Unknown chart type '%s' in add_chart()" % chart_type)
            return

        # Set the embedded chart name if present.
        if 'name' in options:
            chart.chart_name = options['name']

        chart._set_embedded_config_data()
        self.charts.append(chart)

        return chart

    def close(self):
        """
        Call finalisation code and close file.

        Args:
            None.

        Returns:
            Nothing.

        """
        if not self.fileclosed:
            self.fileclosed = 1
            self._store_workbook()

    def set_properties(self, properties):
        """
        Set the document properties such as Title, Author etc.

        Args:
            properties: Dictionary of document properties.

        Returns:
            Nothing.

        """
        self.doc_properties = properties

    def define_name(self, name, formula):
        # Create a defined name in Excel. We handle global/workbook level
        # names and local/worksheet names.
        """
        Create a defined name in the workbook.

        Args:
            name:    The defined name.
            formula: The cell or range that the defined name refers to.

        Returns:
            Nothing.

        """
        sheet_index = None
        sheetname = ''

        # Remove the = sign from the formula if it exists.
        if formula.startswith('='):
            formula = formula.lstrip('=')

        # Local defined names are formatted like "Sheet1!name".
        sheet_parts = re.compile(r'^(.*)!(.*)$')
        match = sheet_parts.match(name)

        if match:
            sheetname = match.group(1)
            name = match.group(2)
            sheet_index = self._get_sheet_index(sheetname)

            # Warn if the sheet index wasn't found.
            if sheet_index is None:
                warn("Unknown sheet name '%s' in defined_name()" % sheetname)
                return -1
        else:
            # Use -1 to indicate global names.
            sheet_index = -1

        # Warn if the defined name contains invalid chars as defined by Excel.
        if (not re.match(r'^[\w\\][\w.]*$', name, re.UNICODE)
                or re.match(r'^\d', name)):
            warn("Invalid Excel characters in defined_name(): '%s'" % name)
            return -1

        # Warn if the defined name looks like a cell name.
        if re.match(r'^[a-zA-Z][a-zA-Z]?[a-dA-D]?[0-9]+$', name):
            warn("Name looks like a cell name in defined_name(): '%s'" % name)
            return -1

        # Warn if the name looks like a R1C1 cell reference.
        if (re.match(r'^[rcRC]$', name)
                or re.match(r'^[rcRC]\d+[rcRC]\d+$', name)):
            warn("Invalid name '%s' like a RC cell ref in defined_name()"
                 % name)
            return -1

        self.defined_names.append([name, sheet_index, formula, False])

    def worksheets(self):
        """
        Return a list of the worksheet objects in the workbook.

        Args:
            None.

        Returns:
            A list of worksheet objects.

        """
        return self.worksheets_objs

    ###########################################################################
    #
    # Private API.
    #
    ###########################################################################

    def _assemble_xml_file(self):
        # Assemble and write the XML file.

        # Prepare format object for passing to Style.pm.
        self._prepare_format_properties()

        # Write the XML declaration.
        self._xml_declaration()

        # Write the workbook element.
        self._write_workbook()

        # Write the fileVersion element.
        self._write_file_version()

        # Write the workbookPr element.
        self._write_workbook_pr()

        # Write the bookViews element.
        self._write_book_views()

        # Write the sheets element.
        self._write_sheets()

        # Write the workbook defined names.
        self._write_defined_names()

        # Write the calcPr element.
        self._write_calc_pr()

        # Close the workbook tag.
        self._xml_end_tag('workbook')

        # Close the file.
        self._xml_close()

    def _store_workbook(self):
        # Assemble worksheets into a workbook.
        packager = Packager()

        # Add a default worksheet if non have been added.
        if not self.worksheets():
            self.add_worksheet()

        # Ensure that at least one worksheet has been selected.
        if self.worksheet_meta.activesheet == 0:
            self.worksheets_objs[0].selected = 1
            self.worksheets_objs[0].hidden = 0

        # Set the active sheet.
        for sheet in self.worksheets():
            if sheet.index == self.worksheet_meta.activesheet:
                sheet.active = 1

        # Convert the SST strings data structure.
        self._prepare_sst_string_data()

        # Prepare the worksheet VML elements such as comments and buttons.
        self._prepare_vml()

        # Set the defined names for the worksheets such as Print Titles.
        self._prepare_defined_names()

        # Prepare the drawings, charts and images.
        self._prepare_drawings()

        # Add cached data to charts.
        self._add_chart_data()

        # Prepare the worksheet tables.
        self._prepare_tables()

        # Package the workbook.
        packager._add_workbook(self)
        packager._set_tmpdir(self.tmpdir)
        packager._set_in_memory(self.in_memory)
        xml_files = packager._create_package()

        # Free up the Packager object.
        packager = None

        xlsx_file = ZipFile(self.filename, "w", compression=ZIP_DEFLATED)

        # Add XML sub-files to the Zip file with their Excel filename.
        for os_filename, xml_filename in xml_files:
            if self.in_memory:
                # The files are in-memory StringIOs.
                xlsx_file.writestr(xml_filename,
                                   os_filename.getvalue().encode('utf-8'))
            else:
                # The files are tempfiles.
                xlsx_file.write(os_filename, xml_filename)
                os.remove(os_filename)

        xlsx_file.close()

    def _check_sheetname(self, sheetname, is_chart=False):
        # Check for valid worksheet names. We check the length, if it contains
        # any invalid chars and if the sheetname is unique in the workbook.
        invalid_char = re.compile(r'[\[\]:*?/\\]')

        # Increment the Sheet/Chart number used for default sheet names below.
        if is_chart:
            self.chartname_count += 1
        else:
            self.sheetname_count += 1

        # Supply default Sheet/Chart sheetname if none has been defined.
        if sheetname is None:
            if is_chart:
                sheetname = self.chart_name + str(self.chartname_count)
            else:
                sheetname = self.sheet_name + str(self.sheetname_count)

        # Check that sheet sheetname is <= 31. Excel limit.
        if len(sheetname) > 31:
            raise Exception("Excel worksheet name '%s' must be <= 31 chars." %
                            sheetname)

        # Check that sheetname doesn't contain any invalid characters
        if invalid_char.search(sheetname):
            raise Exception(
                "Invalid Excel character '[]:*?/\\' in sheetname '%s'" %
                sheetname)

        # Check that the worksheet name doesn't already exist since this is a
        # fatal Excel error. The check must be case insensitive like Excel.
        for worksheet in self.worksheets():
            if sheetname.lower() == worksheet.name.lower():
                raise Exception(
                    "Sheetname '%s', with case ignored, is already in use." %
                    sheetname)

        return sheetname

    def _prepare_format_properties(self):
        # Prepare all Format properties prior to passing them to styles.py.

        # Separate format objects into XF and DXF formats.
        self._prepare_formats()

        # Set the font index for the format objects.
        self._prepare_fonts()

        # Set the number format index for the format objects.
        self._prepare_num_formats()

        # Set the border index for the format objects.
        self._prepare_borders()

        # Set the fill index for the format objects.
        self._prepare_fills()

    def _prepare_formats(self):
        # Iterate through the XF Format objects and separate them into
        # XF and DXF formats. The XF and DF formats then need to be sorted
        # back into index order rather than creation order.
        xf_formats = []
        dxf_formats = []

        # Sort into XF and DXF formats.
        for xf_format in self.formats:
            if xf_format.xf_index is not None:
                xf_formats.append(xf_format)

            if xf_format.dxf_index is not None:
                dxf_formats.append(xf_format)

        # Pre-extend the format lists.
        self.xf_formats = [None] * len(xf_formats)
        self.dxf_formats = [None] * len(dxf_formats)

        # Rearrange formats into index order.
        for xf_format in xf_formats:
            index = xf_format.xf_index
            self.xf_formats[index] = xf_format

        for dxf_format in dxf_formats:
            index = dxf_format.dxf_index
            self.dxf_formats[index] = dxf_format

    def _set_default_xf_indices(self):
        # Set the default index for each format. Only used for testing.

        formats = list(self.formats)

        # Delete the default url format.
        del formats[1]

        # Skip the default date format if set.
        if self.default_date_format is not None:
            del formats[1]

        # Set the remaining formats.
        for xf_format in formats:
            xf_format._get_xf_index()

    def _prepare_fonts(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default font elements.
        fonts = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_font_key()
            if key in fonts:
                # Font has already been used.
                xf_format.font_index = fonts[key]
                xf_format.has_font = 0
            else:
                # This is a new font.
                fonts[key] = index
                xf_format.font_index = index
                xf_format.has_font = 1
                index += 1

        self.font_count = index

        # For DXF formats we only need to check if the properties have changed.
        for xf_format in self.dxf_formats:
            # The only font properties that can change for a DXF format are:
            # color, bold, italic, underline and strikethrough.
            if (xf_format.font_color or xf_format.bold or xf_format.italic
                    or xf_format.underline or xf_format.font_strikeout):
                xf_format.has_dxf_font = 1

    def _prepare_num_formats(self):
        # User records is not None start from index 0xA4.
        num_formats = {}
        index = 164
        num_format_count = 0

        is_number = re.compile(r'^\d+$')
        is_zeroes = re.compile(r'^0+\d')

        for xf_format in (self.xf_formats + self.dxf_formats):
            num_format = xf_format.num_format
            # Check if num_format is an index to a built-in number format.
            # Also check for a string of zeros, which is a valid number
            # format string but would evaluate to zero.

            try:
                if (is_number.match(str(num_format))
                        and not is_zeroes.match(str(num_format))):
                    # Index to a built-in number xf_format.
                    xf_format.num_format_index = int(num_format)
                    continue
            except (TypeError, UnicodeEncodeError):
                pass

            if num_format in num_formats:
                # Number xf_format has already been used.
                xf_format.num_format_index = num_formats[num_format]
            else:
                # Add a new number xf_format.
                num_formats[num_format] = index
                xf_format.num_format_index = index
                index += 1

                # Only increase font count for XF formats (not DXF formats).
                if xf_format.xf_index:
                    num_format_count += 1

        self.num_format_count = num_format_count

    def _prepare_borders(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default border elements.
        borders = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_border_key()

            if key in borders:
                # Border has already been used.
                xf_format.border_index = borders[key]
                xf_format.has_border = 0
            else:
                # This is a new border.
                borders[key] = index
                xf_format.border_index = index
                xf_format.has_border = 1
                index += 1

        self.border_count = index

        # For DXF formats we only need to check if the properties have changed.
        has_border = re.compile(r'[^0:]')

        for xf_format in self.dxf_formats:
            key = xf_format._get_border_key()

            if has_border.search(key):
                xf_format.has_dxf_border = 1

    def _prepare_fills(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default fill elements.
        # The user defined fill properties start from 2 since there are 2
        # default fills: patternType="none" and patternType="gray125".
        fills = {}
        index = 2  # Start from 2. See above.

        # Add the default fills.
        fills['0:0:0'] = 0
        fills['17:0:0'] = 1

        # Store the DXF colours separately since them may be reversed below.
        for xf_format in self.dxf_formats:
            if xf_format.pattern or xf_format.bg_color or xf_format.fg_color:
                xf_format.has_dxf_fill = 1
                xf_format.dxf_bg_color = xf_format.bg_color
                xf_format.dxf_fg_color = xf_format.fg_color

        for xf_format in self.xf_formats:
            # The following logical statements jointly take care of special
            # cases in relation to cell colours and patterns:
            # 1. For a solid fill (_pattern == 1) Excel reverses the role of
            # foreground and background colours, and
            # 2. If the user specifies a foreground or background colour
            # without a pattern they probably wanted a solid fill, so we fill
            # in the defaults.
            if (xf_format.pattern == 1 and xf_format.bg_color != 0
                    and xf_format.fg_color != 0):
                tmp = xf_format.fg_color
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = tmp

            if (xf_format.pattern <= 1 and xf_format.bg_color != 0
                    and xf_format.fg_color == 0):
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = 0
                xf_format.pattern = 1

            if (xf_format.pattern <= 1 and xf_format.bg_color == 0
                    and xf_format.fg_color != 0):
                xf_format.bg_color = 0
                xf_format.pattern = 1

            key = xf_format._get_fill_key()

            if key in fills:
                # Fill has already been used.
                xf_format.fill_index = fills[key]
                xf_format.has_fill = 0
            else:
                # This is a new fill.
                fills[key] = index
                xf_format.fill_index = index
                xf_format.has_fill = 1
                index += 1

        self.fill_count = index

    def _prepare_defined_names(self):
        # Iterate through the worksheets and store any defined names in
        # addition to any user defined names. Stores the defined names
        # for the Workbook.xml and the named ranges for App.xml.
        defined_names = self.defined_names

        for sheet in self.worksheets():
            # Check for Print Area settings.
            if sheet.autofilter_area:
                hidden = 1
                sheet_range = sheet.autofilter_area
                # Store the defined names.
                defined_names.append(['_xlnm._FilterDatabase',
                                      sheet.index, sheet_range, hidden])

            # Check for Print Area settings.
            if sheet.print_area_range:
                hidden = 0
                sheet_range = sheet.print_area_range
                # Store the defined names.
                defined_names.append(['_xlnm.Print_Area',
                                      sheet.index, sheet_range, hidden])

            # Check for repeat rows/cols referred to as Print Titles.
            if sheet.repeat_col_range or sheet.repeat_row_range:
                hidden = 0
                sheet_range = ''
                if sheet.repeat_col_range and sheet.repeat_row_range:
                    sheet_range = (sheet.repeat_col_range + ',' +
                                   sheet.repeat_row_range)
                else:
                    sheet_range = (sheet.repeat_col_range +
                                   sheet.repeat_row_range)
                # Store the defined names.
                defined_names.append(['_xlnm.Print_Titles',
                                      sheet.index, sheet_range, hidden])

        defined_names = self._sort_defined_names(defined_names)
        self.defined_names = defined_names
        self.named_ranges = self._extract_named_ranges(defined_names)

    def _sort_defined_names(self, names):
        # Sort the list of list of internal and user defined names in
        # the same order as used by Excel.

        # Add a normalise name string to each list for sorting.
        for name_list in names:
            (defined_name, _, sheet_name, _) = name_list

            # Normalise the defined name by removing any leading '_xmln.'
            # from internal names and lowercasing the string.
            defined_name = defined_name.replace('_xlnm.', '').lower()

            # Normalise the sheetname by removing the leading quote and
            # lowercasing the string.
            sheet_name = sheet_name.lstrip("'").lower()

            name_list.append(defined_name + "::" + sheet_name)

        # Remove the extra key for sorting.
        names.sort(key=operator.itemgetter(4))

        for name_list in names:
            name_list.pop()

        return names

    def _prepare_drawings(self):
        # Iterate through the worksheets and set up chart and image drawings.
        chart_ref_id = 0
        image_ref_id = 0
        drawing_id = 0

        for sheet in self.worksheets():
            chart_count = len(sheet.charts)
            image_count = len(sheet.images)
            shape_count = len(sheet.shapes)

            if not (chart_count + image_count + shape_count):
                continue

            drawing_id += 1

            for index in range(chart_count):
                chart_ref_id += 1
                sheet._prepare_chart(index, chart_ref_id, drawing_id)

            for index in range(image_count):
                filename = sheet.images[index][2]
                (image_type, width, height, name) = \
                    self._get_image_properties(filename)
                image_ref_id += 1

                sheet._prepare_image(index, image_ref_id, drawing_id, width,
                                     height, name, image_type)

            # for index in range(shape_count):
            #    sheet._prepare_shape(index, drawing_id)

            drawing = sheet.drawing
            self.drawings.append(drawing)

        # Sort the workbook charts references into the order that the were
        # written from the worksheets above.
        self.charts = sorted(self.charts, key=lambda chart: chart.id)

        self.drawing_count = drawing_id

    def _get_image_properties(self, filename):
        # Extract dimension information from the image file.
        height = 0
        width = 0

        # Open the image file and read in the data.
        fh = open(filename, "rb")
        data = fh.read()

        # Get the image filename without the path.
        image_name = os.path.basename(filename)

        # Look for some common image file markers.
        marker1 = (unpack('3s', data[1:4]))[0]
        marker2 = (unpack('>H', data[:2]))[0]
        marker3 = (unpack('2s', data[:2]))[0]

        if sys.version_info < (2, 6, 0):
            # Python 2.5/Jython.
            png_marker = 'PNG'
            bmp_marker = 'BM'
        else:
            # Eval the binary literals for Python 2.5/Jython compatibility.
            png_marker = eval("b'PNG'")
            bmp_marker = eval("b'BM'")

        if marker1 == png_marker:
            self.image_types['png'] = 1
            (image_type, width, height) = self._process_png(data)

        elif marker2 == 0xFFD8:
            self.image_types['jpeg'] = 1
            (image_type, width, height) = self._process_jpg(data)

        elif marker3 == bmp_marker:
            self.image_types['bmp'] = 1
            (image_type, width, height) = self._process_bmp(data)

        else:
            raise Exception("%s: Unknown or unsupported image file format."
                            % filename)

        # Check that we found the required data.
        if not height or not width:
            raise Exception("%s: no size data found in image file." % filename)

        # Store image data to copy it into file container.
        self.images.append([filename, image_type])

        fh.close()
        return image_type, width, height, image_name

    def _process_png(self, data):
        # Extract width and height information from a PNG file.
        width = (unpack('>I', data[16:20]))[0]
        height = (unpack('>I', data[20:24]))[0]

        return 'png', width, height

    def _process_jpg(self, data):
        # Extract width and height information from a JPEG file.
        offset = 2
        data_length = len(data)

        # Search through the image data to find the 0xFFC0 marker.
        # The height and width are contained in the data for that
        # sub-element.
        found = 0
        width = 0
        height = 0
        while not found and offset < data_length:

            marker = (unpack('>H', data[offset + 0:offset + 2]))[0]
            length = (unpack('>H', data[offset + 2:offset + 4]))[0]

            if marker == 0xFFC0 or marker == 0xFFC2:
                height = (unpack('>H', data[offset + 5:offset + 7]))[0]
                width = (unpack('>H', data[offset + 7:offset + 9]))[0]
                found = 1
                continue

            offset = offset + length + 2

            if marker == 0xFFDA:
                found = 1
                continue

        return 'jpeg', width, height

    def _process_bmp(self, data):
        # Extract width and height information from a BMP file.
        width = (unpack('<L', data[18:22]))[0]
        height = (unpack('<L', data[22:26]))[0]
        return 'bmp', width, height

    def _extract_named_ranges(self, defined_names):
        # Extract the named ranges from the sorted list of defined names.
        # These are used in the App.xml file.
        named_ranges = []

        for defined_name in defined_names:

            name = defined_name[0]
            index = defined_name[1]
            sheet_range = defined_name[2]

            # Skip autoFilter ranges.
            if name == '_xlnm._FilterDatabase':
                continue

            # We are only interested in defined names with ranges.
            if '!' in sheet_range:
                sheet_name, _ = sheet_range.split('!', 1)

                # Match Print_Area and Print_Titles xlnm types.
                if name.startswith('_xlnm.'):
                    xlnm_type = name.replace('_xlnm.', '')
                    name = sheet_name + '!' + xlnm_type
                elif index != -1:
                    name = sheet_name + '!' + name

                named_ranges.append(name)

        return named_ranges

    def _get_sheet_index(self, sheetname):
        # Convert a sheet name to its index. Return None otherwise.
        sheetname = sheetname.strip("'")

        if sheetname in self.sheetnames:
            return self.sheetnames.index(sheetname)
        else:
            return None

    def _prepare_vml(self):
        # Iterate through the worksheets and set up the VML objects.
        comment_id = 0
        vml_data_id = 1
        vml_shape_id = 1024
        vml_files = 0
        comment_files = 0

        for sheet in self.worksheets():
            if not sheet.has_vml:
                continue

            vml_files += 1

            if sheet.has_comments:
                comment_files += 1

            comment_id += 1
            count = sheet._prepare_vml_objects(vml_data_id,
                                               vml_shape_id,
                                               comment_id)

            # Each VML file should start with a shape id incremented by 1024.
            vml_data_id += 1 * int((1024 + count) / 1024)
            vml_shape_id += 1024 * int((1024 + count) / 1024)

        self.num_vml_files = vml_files
        self.num_comment_files = comment_files

        # Add a font format for cell comments.
        if comment_files > 0:
            xf = self.add_format({'font_name': 'Tahoma', 'font_size': 8,
                                  'color_indexed': 81, 'font_only': True})
            xf._get_xf_index()

    def _prepare_tables(self):
        # Set the table ids for the worksheet tables.
        table_id = 0

        for sheet in self.worksheets():
            table_count = len(sheet.tables)

            if not table_count:
                continue

            sheet._prepare_tables(table_id + 1)
            table_id += table_count

    def _add_chart_data(self):
        # Add "cached" data to charts to provide the numCache and strCacher
        # data for series and title/axis ranges.
        worksheets = {}
        seen_ranges = {}

        # Map worksheet names to worksheet objects.
        for worksheet in self.worksheets():
            worksheets[worksheet.name] = worksheet

        for chart in self.charts:

            for c_range in chart.formula_ids.keys():
                r_id = chart.formula_ids[c_range]

                # Skip if the series has user defined data.
                if chart.formula_data[r_id] is not None:
                    if (not c_range in seen_ranges
                            or seen_ranges[c_range] is None):
                        data = chart.formula_data[r_id]
                        seen_ranges[c_range] = data
                    continue

                # Check to see if the data is already cached locally.
                if c_range in seen_ranges:
                    chart.formula_data[r_id] = seen_ranges[c_range]
                    continue

                # Convert the range formula to a sheet name and cell range.
                (sheetname, cells) = self._get_chart_range(c_range)

                # Skip if we couldn't parse the formula.
                if sheetname is None:
                    continue

                # Die if the name is unknown since it indicates a user error in
                # a chart series formula.
                if not sheetname in worksheets:
                    warn("Unknown worksheet reference '%s' in range "
                         "'%s' passed to add_series()" % (sheetname, c_range))

                # Find the worksheet object based on the sheet name.
                worksheet = worksheets[sheetname]

                # Get the data from the worksheet table.
                data = worksheet._get_range_data(*cells)

                # TODO
                #   # Ignore rich strings for now. Deparse later if necessary.
                #        if token =~ m{^<r>} and token =~ m{</r>$}:
                #            token = ''

                # Add the data to the chart.
                chart.formula_data[r_id] = data

                # Store range data locally to avoid lookup if seen again.
                seen_ranges[c_range] = data

    def _get_chart_range(self, c_range):
        # Convert a range formula such as Sheet1!$B$1:$B$5 into a sheet name
        # and cell range such as ( 'Sheet1', 0, 1, 4, 1 ).

        # Split the range formula into sheetname and cells at the last '!'.
        # TODO. Fix this to match from right.
        pos = c_range.find('!')
        if pos > 0:
            sheetname, cells = c_range.split('!')
        else:
            return None

        # Split the cell range into 2 cells or else use single cell for both.
        if cells.find(':') > 0:
            (cell_1, cell_2) = cells.split(':')
        else:
            (cell_1, cell_2) = (cells, cells)

        # Remove leading/trailing quotes and convert escaped quotes to single.
        sheetname = sheetname.strip("'")
        sheetname = sheetname.replace("''", "'")

        (row_start, col_start) = xl_cell_to_rowcol(cell_1)
        (row_end, col_end) = xl_cell_to_rowcol(cell_2)

        # Check that we have a 1D range only.
        if row_start != row_end and col_start != col_end:
            return None

        return sheetname, [row_start, col_start, row_end, col_end]

    def _prepare_sst_string_data(self):
        # Convert the SST string data from a dict to a list.
        self.str_table._sort_string_data()

    ###########################################################################
    #
    # XML methods.
    #
    ###########################################################################

    def _write_workbook(self):
        # Write <workbook> element.

        schema = 'http://schemas.openxmlformats.org'
        xmlns = schema + '/spreadsheetml/2006/main'
        xmlns_r = schema + '/officeDocument/2006/relationships'

        attributes = [
            ('xmlns', xmlns),
            ('xmlns:r', xmlns_r),
        ]

        self._xml_start_tag('workbook', attributes)

    def _write_file_version(self):
        # Write the <fileVersion> element.

        app_name = 'xl'
        last_edited = 4
        lowest_edited = 4
        rup_build = 4505

        attributes = [
            ('appName', app_name),
            ('lastEdited', last_edited),
            ('lowestEdited', lowest_edited),
            ('rupBuild', rup_build),
        ]

        if self.vba_project:
            attributes.append(
                ('codeName', '{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}'))

        self._xml_empty_tag('fileVersion', attributes)

    def _write_workbook_pr(self):
        # Write <workbookPr> element.
        default_theme_version = 124226
        attributes = []

        if self.vba_codename:
            attributes.append(('codeName', self.vba_codename))
        if self.date_1904:
            attributes.append(('date1904', 1))

        attributes.append(('defaultThemeVersion', default_theme_version))

        self._xml_empty_tag('workbookPr', attributes)

    def _write_book_views(self):
        # Write <bookViews> element.
        self._xml_start_tag('bookViews')
        self._write_workbook_view()
        self._xml_end_tag('bookViews')

    def _write_workbook_view(self):
        # Write <workbookView> element.
        attributes = [
            ('xWindow', self.x_window),
            ('yWindow', self.y_window),
            ('windowWidth', self.window_width),
            ('windowHeight', self.window_height),
        ]

        # Store the tabRatio attribute when it isn't the default.
        if self.tab_ratio != 500:
            attributes.append(('tabRatio', self.tab_ratio))

        # Store the firstSheet attribute when it isn't the default.
        if self.worksheet_meta.firstsheet > 0:
            firstsheet = self.worksheet_meta.firstsheet + 1
            attributes.append(('firstSheet', firstsheet))

        # Store the activeTab attribute when it isn't the first sheet.
        if self.worksheet_meta.activesheet > 0:
            attributes.append(('activeTab', self.worksheet_meta.activesheet))

        self._xml_empty_tag('workbookView', attributes)

    def _write_sheets(self):
        # Write <sheets> element.
        self._xml_start_tag('sheets')

        id_num = 1
        for worksheet in self.worksheets():
            self._write_sheet(worksheet.name, id_num, worksheet.hidden)
            id_num += 1

        self._xml_end_tag('sheets')

    def _write_sheet(self, name, sheet_id, hidden):
        # Write <sheet> element.
        attributes = [
            ('name', name),
            ('sheetId', sheet_id),
        ]

        if hidden:
            attributes.append(('state', 'hidden'))

        attributes.append(('r:id', 'rId' + str(sheet_id)))

        self._xml_empty_tag('sheet', attributes)

    def _write_calc_pr(self):
        # Write the <calcPr> element.
        attributes = [('calcId', '124519'),
                      ('fullCalcOnLoad', '1')]

        self._xml_empty_tag('calcPr', attributes)

    def _write_defined_names(self):
        # Write the <definedNames> element.
        if not self.defined_names:
            return

        self._xml_start_tag('definedNames')

        for defined_name in self.defined_names:
            self._write_defined_name(defined_name)

        self._xml_end_tag('definedNames')

    def _write_defined_name(self, defined_name):
        # Write the <definedName> element.
        name = defined_name[0]
        sheet_id = defined_name[1]
        sheet_range = defined_name[2]
        hidden = defined_name[3]

        attributes = [('name', name)]

        if sheet_id != -1:
            attributes.append(('localSheetId', sheet_id))
        if hidden:
            attributes.append(('hidden', 1))

        self._xml_data_element('definedName', sheet_range, attributes)
Ejemplo n.º 4
0
    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename

        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.strings_to_numbers = options.get('strings_to_numbers', False)
        self.strings_to_formulas = options.get('strings_to_formulas', True)
        self.strings_to_urls = options.get('strings_to_urls', True)
        self.default_date_format = options.get('default_date_format', None)
        self.optimization = options.get('constant_memory', False)
        self.in_memory = options.get('in_memory', False)
        self.excel2003_style = options.get('excel2003_style', False)
        self.default_format_properties = \
            options.get('default_format_properties', {})

        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_is_stream = False
        self.vba_codename = None
        self.image_types = {}
        self.images = []
        self.border_count = 0
        self.fill_count = 0
        self.drawing_count = 0
        self.calc_mode = "auto"
        self.calc_on_load = True
        self.allow_zip64 = False
        self.calc_id = 124519

        # We can't do 'constant_memory' mode while doing 'in_memory' mode.
        if self.in_memory:
            self.optimization = False

        # Add the default cell format.
        if self.excel2003_style:
            self.add_format({'xf_index': 0, 'font_family': 0})
        else:
            self.add_format({'xf_index': 0})

        # Add a default URL format.
        self.default_url_format = self.add_format({
            'color': 'blue',
            'underline': 1
        })

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = \
                self.add_format({'num_format': self.default_date_format})
Ejemplo n.º 5
0
class Workbook(xmlwriter.XMLwriter):
    """
    A class for writing the Excel XLSX Workbook file.


    """

    ###########################################################################
    #
    # Public API.
    #
    ###########################################################################

    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename

        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.strings_to_numbers = options.get('strings_to_numbers', False)
        self.strings_to_formulas = options.get('strings_to_formulas', True)
        self.strings_to_urls = options.get('strings_to_urls', True)
        self.default_date_format = options.get('default_date_format', None)
        self.optimization = options.get('constant_memory', False)
        self.in_memory = options.get('in_memory', False)
        self.excel2003_style = options.get('excel2003_style', False)
        self.default_format_properties = \
            options.get('default_format_properties', {})

        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_is_stream = False
        self.vba_codename = None
        self.image_types = {}
        self.images = []
        self.border_count = 0
        self.fill_count = 0
        self.drawing_count = 0
        self.calc_mode = "auto"
        self.calc_on_load = True
        self.allow_zip64 = False
        self.calc_id = 124519

        # We can't do 'constant_memory' mode while doing 'in_memory' mode.
        if self.in_memory:
            self.optimization = False

        # Add the default cell format.
        if self.excel2003_style:
            self.add_format({'xf_index': 0, 'font_family': 0})
        else:
            self.add_format({'xf_index': 0})

        # Add a default URL format.
        self.default_url_format = self.add_format({
            'color': 'blue',
            'underline': 1
        })

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = \
                self.add_format({'num_format': self.default_date_format})

    def __del__(self):
        """Close file in destructor if it hasn't been closed explicitly."""
        try:
            if not self.fileclosed:
                self.close()
        except:
            raise Exception("Exception caught in workbook destructor. "
                            "Explicit close() may be required for workbook.")

    def add_worksheet(self, name=None):
        """
        Add a new worksheet to the Excel workbook.

        Args:
            name: The worksheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a worksheet object.

        """
        return self._add_sheet(name, is_chartsheet=False)

    def add_chartsheet(self, name=None):
        """
        Add a new chartsheet to the Excel workbook.

        Args:
            name: The chartsheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a chartsheet object.

        """
        return self._add_sheet(name, is_chartsheet=True)

    def add_format(self, properties={}):
        """
        Add a new Format to the Excel Workbook.

        Args:
            properties: The format properties.

        Returns:
            Reference to a Format object.

        """
        format_properties = self.default_format_properties.copy()

        if self.excel2003_style:
            format_properties = {
                'font_name': 'Arial',
                'font_size': 10,
                'theme': 1 * -1
            }

        format_properties.update(properties)

        xf_format = Format(format_properties, self.xf_format_indices,
                           self.dxf_format_indices)

        # Store the format reference.
        self.formats.append(xf_format)

        return xf_format

    def add_chart(self, options):
        """
        Create a chart object.

        Args:
            options: The chart type and subtype options.

        Returns:
            Reference to a Chart object.

        """

        # Type must be specified so we can create the required chart instance.
        chart_type = options.get('type')
        if chart_type is None:
            warn("Chart type must be defined in add_chart()")
            return

        if chart_type == 'area':
            chart = ChartArea(options)
        elif chart_type == 'bar':
            chart = ChartBar(options)
        elif chart_type == 'column':
            chart = ChartColumn(options)
        elif chart_type == 'doughnut':
            chart = ChartDoughnut(options)
        elif chart_type == 'line':
            chart = ChartLine(options)
        elif chart_type == 'pie':
            chart = ChartPie(options)
        elif chart_type == 'radar':
            chart = ChartRadar(options)
        elif chart_type == 'scatter':
            chart = ChartScatter(options)
        elif chart_type == 'stock':
            chart = ChartStock(options)
        else:
            warn("Unknown chart type '%s' in add_chart()" % chart_type)
            return

        # Set the embedded chart name if present.
        if 'name' in options:
            chart.chart_name = options['name']

        chart.embedded = True
        chart.date_1904 = self.date_1904

        self.charts.append(chart)

        return chart

    def add_vba_project(self, vba_project, is_stream=False):
        """
        Add a vbaProject binary to the Excel workbook.

        Args:
            vba_project: The vbaProject binary file name.
            is_stream:   vba_project is an in memory byte stream.

        Returns:
            Nothing.

        """
        if not is_stream and not os.path.exists(vba_project):
            warn("VBA project binary file '%s' not found." % vba_project)
            return -1

        self.vba_project = vba_project
        self.vba_is_stream = is_stream

    def close(self):
        """
        Call finalisation code and close file.

        Args:
            None.

        Returns:
            Nothing.

        """
        if not self.fileclosed:
            self.fileclosed = 1
            self._store_workbook()

    def set_properties(self, properties):
        """
        Set the document properties such as Title, Author etc.

        Args:
            properties: Dictionary of document properties.

        Returns:
            Nothing.

        """
        self.doc_properties = properties

    def set_calc_mode(self, mode, calc_id=None):
        """
        Set the Excel caclcuation mode for the workbook.

        Args:
            mode: String containing one of:
                * manual
                * auto_except_tables
                * auto

        Returns:
            Nothing.

        """
        self.calc_mode = mode

        if mode == 'manual':
            self.calc_on_load = False
        elif mode == 'auto_except_tables':
            self.calc_mode = 'autoNoTable'

        # Leave undocumented for now. Rarely required.
        if calc_id:
            self.calc_id = calc_id

    def define_name(self, name, formula):
        # Create a defined name in Excel. We handle global/workbook level
        # names and local/worksheet names.
        """
        Create a defined name in the workbook.

        Args:
            name:    The defined name.
            formula: The cell or range that the defined name refers to.

        Returns:
            Nothing.

        """
        sheet_index = None
        sheetname = ''

        # Remove the = sign from the formula if it exists.
        if formula.startswith('='):
            formula = formula.lstrip('=')

        # Local defined names are formatted like "Sheet1!name".
        sheet_parts = re.compile(r'^(.*)!(.*)$')
        match = sheet_parts.match(name)

        if match:
            sheetname = match.group(1)
            name = match.group(2)
            sheet_index = self._get_sheet_index(sheetname)

            # Warn if the sheet index wasn't found.
            if sheet_index is None:
                warn("Unknown sheet name '%s' in defined_name()" % sheetname)
                return -1
        else:
            # Use -1 to indicate global names.
            sheet_index = -1

        # Warn if the defined name contains invalid chars as defined by Excel.
        if (not re.match(r'^[\w\\][\w.]*$', name, re.UNICODE)
                or re.match(r'^\d', name)):
            warn("Invalid Excel characters in defined_name(): '%s'" % name)
            return -1

        # Warn if the defined name looks like a cell name.
        if re.match(r'^[a-zA-Z][a-zA-Z]?[a-dA-D]?[0-9]+$', name):
            warn("Name looks like a cell name in defined_name(): '%s'" % name)
            return -1

        # Warn if the name looks like a R1C1 cell reference.
        if (re.match(r'^[rcRC]$', name)
                or re.match(r'^[rcRC]\d+[rcRC]\d+$', name)):
            warn("Invalid name '%s' like a RC cell ref in defined_name()" %
                 name)
            return -1

        self.defined_names.append([name, sheet_index, formula, False])

    def worksheets(self):
        """
        Return a list of the worksheet objects in the workbook.

        Args:
            None.

        Returns:
            A list of worksheet objects.

        """
        return self.worksheets_objs

    def use_zip64(self):
        """
        Allow ZIP64 extensions when writing xlsx file zip container.

        Args:
            None.

        Returns:
            Nothing.

        """
        self.allow_zip64 = True

    def set_vba_name(self, name=None):
        """
        Set the VBA name for the workbook. By default the workbook is referred
        to as ThisWorkbook in VBA.

        Args:
            name: The VBA name for the workbook.

        Returns:
            Nothing.

        """
        if name is not None:
            self.vba_codename = name
        else:
            self.vba_codename = 'ThisWorkbook'

    ###########################################################################
    #
    # Private API.
    #
    ###########################################################################

    def _assemble_xml_file(self):
        # Assemble and write the XML file.

        # Prepare format object for passing to Style.pm.
        self._prepare_format_properties()

        # Write the XML declaration.
        self._xml_declaration()

        # Write the workbook element.
        self._write_workbook()

        # Write the fileVersion element.
        self._write_file_version()

        # Write the workbookPr element.
        self._write_workbook_pr()

        # Write the bookViews element.
        self._write_book_views()

        # Write the sheets element.
        self._write_sheets()

        # Write the workbook defined names.
        self._write_defined_names()

        # Write the calcPr element.
        self._write_calc_pr()

        # Close the workbook tag.
        self._xml_end_tag('workbook')

        # Close the file.
        self._xml_close()

    def _store_workbook(self):
        # Assemble worksheets into a workbook.
        packager = Packager()

        # Add a default worksheet if non have been added.
        if not self.worksheets():
            self.add_worksheet()

        # Ensure that at least one worksheet has been selected.
        if self.worksheet_meta.activesheet == 0:
            self.worksheets_objs[0].selected = 1
            self.worksheets_objs[0].hidden = 0

        # Set the active sheet.
        for sheet in self.worksheets():
            if sheet.index == self.worksheet_meta.activesheet:
                sheet.active = 1

        # Convert the SST strings data structure.
        self._prepare_sst_string_data()

        # Prepare the worksheet VML elements such as comments and buttons.
        self._prepare_vml()

        # Set the defined names for the worksheets such as Print Titles.
        self._prepare_defined_names()

        # Prepare the drawings, charts and images.
        self._prepare_drawings()

        # Add cached data to charts.
        self._add_chart_data()

        # Prepare the worksheet tables.
        self._prepare_tables()

        # Package the workbook.
        packager._add_workbook(self)
        packager._set_tmpdir(self.tmpdir)
        packager._set_in_memory(self.in_memory)
        xml_files = packager._create_package()

        # Free up the Packager object.
        packager = None

        xlsx_file = ZipFile(self.filename,
                            "w",
                            compression=ZIP_DEFLATED,
                            allowZip64=self.allow_zip64)

        # Add XML sub-files to the Zip file with their Excel filename.
        for os_filename, xml_filename, is_binary in xml_files:
            if self.in_memory:
                # The files are in-memory StringIOs.
                if is_binary:
                    xlsx_file.writestr(xml_filename, os_filename.getvalue())
                else:
                    xlsx_file.writestr(xml_filename,
                                       os_filename.getvalue().encode('utf-8'))
            else:
                # The files are tempfiles.
                xlsx_file.write(os_filename, xml_filename)
                os.remove(os_filename)

        xlsx_file.close()

    def _add_sheet(self, name, is_chartsheet):
        # Utility for shared code in add_worksheet() and add_chartsheet().

        sheet_index = len(self.worksheets_objs)
        name = self._check_sheetname(name, is_chartsheet)

        # Initialisation data to pass to the worksheet.
        init_data = {
            'name': name,
            'index': sheet_index,
            'str_table': self.str_table,
            'worksheet_meta': self.worksheet_meta,
            'optimization': self.optimization,
            'tmpdir': self.tmpdir,
            'date_1904': self.date_1904,
            'strings_to_numbers': self.strings_to_numbers,
            'strings_to_formulas': self.strings_to_formulas,
            'strings_to_urls': self.strings_to_urls,
            'default_date_format': self.default_date_format,
            'default_url_format': self.default_url_format,
            'excel2003_style': self.excel2003_style,
        }

        if is_chartsheet:
            worksheet = Chartsheet()
        else:
            worksheet = Worksheet()

        worksheet._initialize(init_data)

        self.worksheets_objs.append(worksheet)
        self.sheetnames.append(name)

        return worksheet

    def _check_sheetname(self, sheetname, is_chartsheet=False):
        # Check for valid worksheet names. We check the length, if it contains
        # any invalid chars and if the sheetname is unique in the workbook.
        invalid_char = re.compile(r'[\[\]:*?/\\]')

        # Increment the Sheet/Chart number used for default sheet names below.
        if is_chartsheet:
            self.chartname_count += 1
        else:
            self.sheetname_count += 1

        # Supply default Sheet/Chart sheetname if none has been defined.
        if sheetname is None:
            if is_chartsheet:
                sheetname = self.chart_name + str(self.chartname_count)
            else:
                sheetname = self.sheet_name + str(self.sheetname_count)

        # Check that sheet sheetname is <= 31. Excel limit.
        if len(sheetname) > 31:
            raise Exception("Excel worksheet name '%s' must be <= 31 chars." %
                            sheetname)

        # Check that sheetname doesn't contain any invalid characters
        if invalid_char.search(sheetname):
            raise Exception(
                "Invalid Excel character '[]:*?/\\' in sheetname '%s'" %
                sheetname)

        # Check that the worksheet name doesn't already exist since this is a
        # fatal Excel error. The check must be case insensitive like Excel.
        for worksheet in self.worksheets():
            if sheetname.lower() == worksheet.name.lower():
                raise Exception(
                    "Sheetname '%s', with case ignored, is already in use." %
                    sheetname)

        return sheetname

    def _prepare_format_properties(self):
        # Prepare all Format properties prior to passing them to styles.py.

        # Separate format objects into XF and DXF formats.
        self._prepare_formats()

        # Set the font index for the format objects.
        self._prepare_fonts()

        # Set the number format index for the format objects.
        self._prepare_num_formats()

        # Set the border index for the format objects.
        self._prepare_borders()

        # Set the fill index for the format objects.
        self._prepare_fills()

    def _prepare_formats(self):
        # Iterate through the XF Format objects and separate them into
        # XF and DXF formats. The XF and DF formats then need to be sorted
        # back into index order rather than creation order.
        xf_formats = []
        dxf_formats = []

        # Sort into XF and DXF formats.
        for xf_format in self.formats:
            if xf_format.xf_index is not None:
                xf_formats.append(xf_format)

            if xf_format.dxf_index is not None:
                dxf_formats.append(xf_format)

        # Pre-extend the format lists.
        self.xf_formats = [None] * len(xf_formats)
        self.dxf_formats = [None] * len(dxf_formats)

        # Rearrange formats into index order.
        for xf_format in xf_formats:
            index = xf_format.xf_index
            self.xf_formats[index] = xf_format

        for dxf_format in dxf_formats:
            index = dxf_format.dxf_index
            self.dxf_formats[index] = dxf_format

    def _set_default_xf_indices(self):
        # Set the default index for each format. Only used for testing.

        formats = list(self.formats)

        # Delete the default url format.
        del formats[1]

        # Skip the default date format if set.
        if self.default_date_format is not None:
            del formats[1]

        # Set the remaining formats.
        for xf_format in formats:
            xf_format._get_xf_index()

    def _prepare_fonts(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default font elements.
        fonts = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_font_key()
            if key in fonts:
                # Font has already been used.
                xf_format.font_index = fonts[key]
                xf_format.has_font = 0
            else:
                # This is a new font.
                fonts[key] = index
                xf_format.font_index = index
                xf_format.has_font = 1
                index += 1

        self.font_count = index

        # For DXF formats we only need to check if the properties have changed.
        for xf_format in self.dxf_formats:
            # The only font properties that can change for a DXF format are:
            # color, bold, italic, underline and strikethrough.
            if (xf_format.font_color or xf_format.bold or xf_format.italic
                    or xf_format.underline or xf_format.font_strikeout):
                xf_format.has_dxf_font = 1

    def _prepare_num_formats(self):
        # User defined records in Excel start from index 0xA4.
        num_formats = {}
        index = 164
        num_format_count = 0

        for xf_format in (self.xf_formats + self.dxf_formats):
            num_format = xf_format.num_format

            # Check if num_format is an index to a built-in number format.
            if not isinstance(num_format, str_types):
                xf_format.num_format_index = int(num_format)
                continue

            if num_format in num_formats:
                # Number xf_format has already been used.
                xf_format.num_format_index = num_formats[num_format]
            else:
                # Add a new number xf_format.
                num_formats[num_format] = index
                xf_format.num_format_index = index
                index += 1

                # Only increase font count for XF formats (not DXF formats).
                if xf_format.xf_index:
                    num_format_count += 1

        self.num_format_count = num_format_count

    def _prepare_borders(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default border elements.
        borders = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_border_key()

            if key in borders:
                # Border has already been used.
                xf_format.border_index = borders[key]
                xf_format.has_border = 0
            else:
                # This is a new border.
                borders[key] = index
                xf_format.border_index = index
                xf_format.has_border = 1
                index += 1

        self.border_count = index

        # For DXF formats we only need to check if the properties have changed.
        has_border = re.compile(r'[^0:]')

        for xf_format in self.dxf_formats:
            key = xf_format._get_border_key()

            if has_border.search(key):
                xf_format.has_dxf_border = 1

    def _prepare_fills(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default fill elements.
        # The user defined fill properties start from 2 since there are 2
        # default fills: patternType="none" and patternType="gray125".
        fills = {}
        index = 2  # Start from 2. See above.

        # Add the default fills.
        fills['0:0:0'] = 0
        fills['17:0:0'] = 1

        # Store the DXF colours separately since them may be reversed below.
        for xf_format in self.dxf_formats:
            if xf_format.pattern or xf_format.bg_color or xf_format.fg_color:
                xf_format.has_dxf_fill = 1
                xf_format.dxf_bg_color = xf_format.bg_color
                xf_format.dxf_fg_color = xf_format.fg_color

        for xf_format in self.xf_formats:
            # The following logical statements jointly take care of special
            # cases in relation to cell colours and patterns:
            # 1. For a solid fill (_pattern == 1) Excel reverses the role of
            # foreground and background colours, and
            # 2. If the user specifies a foreground or background colour
            # without a pattern they probably wanted a solid fill, so we fill
            # in the defaults.
            if (xf_format.pattern == 1 and xf_format.bg_color != 0
                    and xf_format.fg_color != 0):
                tmp = xf_format.fg_color
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = tmp

            if (xf_format.pattern <= 1 and xf_format.bg_color != 0
                    and xf_format.fg_color == 0):
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = 0
                xf_format.pattern = 1

            if (xf_format.pattern <= 1 and xf_format.bg_color == 0
                    and xf_format.fg_color != 0):
                xf_format.bg_color = 0
                xf_format.pattern = 1

            key = xf_format._get_fill_key()

            if key in fills:
                # Fill has already been used.
                xf_format.fill_index = fills[key]
                xf_format.has_fill = 0
            else:
                # This is a new fill.
                fills[key] = index
                xf_format.fill_index = index
                xf_format.has_fill = 1
                index += 1

        self.fill_count = index

    def _prepare_defined_names(self):
        # Iterate through the worksheets and store any defined names in
        # addition to any user defined names. Stores the defined names
        # for the Workbook.xml and the named ranges for App.xml.
        defined_names = self.defined_names

        for sheet in self.worksheets():
            # Check for Print Area settings.
            if sheet.autofilter_area:
                hidden = 1
                sheet_range = sheet.autofilter_area
                # Store the defined names.
                defined_names.append([
                    '_xlnm._FilterDatabase', sheet.index, sheet_range, hidden
                ])

            # Check for Print Area settings.
            if sheet.print_area_range:
                hidden = 0
                sheet_range = sheet.print_area_range
                # Store the defined names.
                defined_names.append(
                    ['_xlnm.Print_Area', sheet.index, sheet_range, hidden])

            # Check for repeat rows/cols referred to as Print Titles.
            if sheet.repeat_col_range or sheet.repeat_row_range:
                hidden = 0
                sheet_range = ''
                if sheet.repeat_col_range and sheet.repeat_row_range:
                    sheet_range = (sheet.repeat_col_range + ',' +
                                   sheet.repeat_row_range)
                else:
                    sheet_range = (sheet.repeat_col_range +
                                   sheet.repeat_row_range)
                # Store the defined names.
                defined_names.append(
                    ['_xlnm.Print_Titles', sheet.index, sheet_range, hidden])

        defined_names = self._sort_defined_names(defined_names)
        self.defined_names = defined_names
        self.named_ranges = self._extract_named_ranges(defined_names)

    def _sort_defined_names(self, names):
        # Sort the list of list of internal and user defined names in
        # the same order as used by Excel.

        # Add a normalise name string to each list for sorting.
        for name_list in names:
            (defined_name, _, sheet_name, _) = name_list

            # Normalise the defined name by removing any leading '_xmln.'
            # from internal names and lowercasing the string.
            defined_name = defined_name.replace('_xlnm.', '').lower()

            # Normalise the sheetname by removing the leading quote and
            # lowercasing the string.
            sheet_name = sheet_name.lstrip("'").lower()

            name_list.append(defined_name + "::" + sheet_name)

        # Sort based on the normalised key.
        names.sort(key=operator.itemgetter(4))

        # Remove the extra key used for sorting.
        for name_list in names:
            name_list.pop()

        return names

    def _prepare_drawings(self):
        # Iterate through the worksheets and set up chart and image drawings.
        chart_ref_id = 0
        image_ref_id = 0
        drawing_id = 0
        x_dpi = 96
        y_dpi = 96

        for sheet in self.worksheets():
            chart_count = len(sheet.charts)
            image_count = len(sheet.images)
            shape_count = len(sheet.shapes)

            header_image_count = len(sheet.header_images)
            footer_image_count = len(sheet.footer_images)
            has_drawing = False

            if not (chart_count or image_count or shape_count
                    or header_image_count or footer_image_count):
                continue

            # Don't increase the drawing_id header/footer images.
            if chart_count or image_count or shape_count:
                drawing_id += 1
                has_drawing = True

            # Prepare the worksheet charts.
            for index in range(chart_count):
                chart_ref_id += 1
                sheet._prepare_chart(index, chart_ref_id, drawing_id)

            # Prepare the worksheet images.
            for index in range(image_count):
                filename = sheet.images[index][2]
                image_data = sheet.images[index][10]
                (image_type, width, height, name, x_dpi, y_dpi) = \
                    self._get_image_properties(filename, image_data)
                image_ref_id += 1

                sheet._prepare_image(index, image_ref_id, drawing_id, width,
                                     height, name, image_type, x_dpi, y_dpi)

            # Prepare the header images.
            for index in range(header_image_count):

                filename = sheet.header_images[index][0]
                image_data = sheet.header_images[index][1]
                position = sheet.header_images[index][2]

                (image_type, width, height, name, x_dpi, y_dpi) = \
                    self._get_image_properties(filename, image_data)

                image_ref_id += 1

                sheet._prepare_header_image(image_ref_id, width, height, name,
                                            image_type, position, x_dpi, y_dpi)

            # Prepare the footer images.
            for index in range(footer_image_count):

                filename = sheet.footer_images[index][0]
                image_data = sheet.footer_images[index][1]
                position = sheet.footer_images[index][2]

                (image_type, width, height, name, x_dpi, y_dpi) = \
                    self._get_image_properties(filename, image_data)

                image_ref_id += 1

                sheet._prepare_header_image(image_ref_id, width, height, name,
                                            image_type, position, x_dpi, y_dpi)

            if has_drawing:
                drawing = sheet.drawing
                self.drawings.append(drawing)

        # Sort the workbook charts references into the order that the were
        # written from the worksheets above.
        self.charts = sorted(self.charts, key=lambda chart: chart.id)

        self.drawing_count = drawing_id

    def _get_image_properties(self, filename, image_data):
        # Extract dimension information from the image file.
        height = 0
        width = 0
        x_dpi = 96
        y_dpi = 96

        if not image_data:
            # Open the image file and read in the data.
            fh = open(filename, "rb")
            data = fh.read()
        else:
            # Read the image data from the user supplied byte stream.
            data = image_data.getvalue()

        # Get the image filename without the path.
        image_name = os.path.basename(filename)

        # Look for some common image file markers.
        marker1 = (unpack('3s', data[1:4]))[0]
        marker2 = (unpack('>H', data[:2]))[0]
        marker3 = (unpack('2s', data[:2]))[0]

        if sys.version_info < (2, 6, 0):
            # Python 2.5/Jython.
            png_marker = 'PNG'
            bmp_marker = 'BM'
        else:
            # Eval the binary literals for Python 2.5/Jython compatibility.
            png_marker = eval("b'PNG'")
            bmp_marker = eval("b'BM'")

        if marker1 == png_marker:
            self.image_types['png'] = 1
            (image_type, width, height, x_dpi, y_dpi) = self._process_png(data)

        elif marker2 == 0xFFD8:
            self.image_types['jpeg'] = 1
            (image_type, width, height, x_dpi, y_dpi) = self._process_jpg(data)

        elif marker3 == bmp_marker:
            self.image_types['bmp'] = 1
            (image_type, width, height) = self._process_bmp(data)

        else:
            raise Exception("%s: Unknown or unsupported image file format." %
                            filename)

        # Check that we found the required data.
        if not height or not width:
            raise Exception("%s: no size data found in image file." % filename)

        # Store image data to copy it into file container.
        self.images.append([filename, image_type, image_data])

        if not image_data:
            fh.close()

        return image_type, width, height, image_name, x_dpi, y_dpi

    def _process_png(self, data):
        # Extract width and height information from a PNG file.
        offset = 8
        data_length = len(data)
        end_marker = False
        width = 0
        height = 0
        x_dpi = 96
        y_dpi = 96

        # Look for numbers rather than strings for Python 2.6/3 compatibility.
        marker_ihdr = 0x49484452  # IHDR
        marker_phys = 0x70485973  # pHYs
        marker_iend = 0X49454E44  # IEND

        # Search through the image data to read the height and width in the
        # IHDR element. Also read the DPI in the pHYs element.
        while not end_marker and offset < data_length:

            length = (unpack('>I', data[offset + 0:offset + 4]))[0]
            marker = (unpack('>I', data[offset + 4:offset + 8]))[0]

            # Read the image dimensions.
            if marker == marker_ihdr:
                width = (unpack('>I', data[offset + 8:offset + 12]))[0]
                height = (unpack('>I', data[offset + 12:offset + 16]))[0]

            # Read the image DPI.
            if marker == marker_phys:
                x_density = (unpack('>I', data[offset + 8:offset + 12]))[0]
                y_density = (unpack('>I', data[offset + 12:offset + 16]))[0]
                units = (unpack('b', data[offset + 16:offset + 17]))[0]

                if units == 1:
                    x_dpi = x_density * 0.0254
                    y_dpi = y_density * 0.0254

            if marker == marker_iend:
                end_marker = True
                continue

            offset = offset + length + 12

        return 'png', width, height, x_dpi, y_dpi

    def _process_jpg(self, data):
        # Extract width and height information from a JPEG file.
        offset = 2
        data_length = len(data)
        end_marker = False
        width = 0
        height = 0
        x_dpi = 96
        y_dpi = 96

        # Search through the image data to read the height and width in the
        # 0xFFC0/C2 element. Also read the DPI in the 0xFFE0 element.
        while not end_marker and offset < data_length:

            marker = (unpack('>H', data[offset + 0:offset + 2]))[0]
            length = (unpack('>H', data[offset + 2:offset + 4]))[0]

            # Read the image dimensions.
            if marker == 0xFFC0 or marker == 0xFFC2:
                height = (unpack('>H', data[offset + 5:offset + 7]))[0]
                width = (unpack('>H', data[offset + 7:offset + 9]))[0]

            # Read the image DPI.
            if marker == 0xFFE0:
                units = (unpack('b', data[offset + 11:offset + 12]))[0]
                x_density = (unpack('>H', data[offset + 12:offset + 14]))[0]
                y_density = (unpack('>H', data[offset + 14:offset + 16]))[0]

                if units == 1:
                    x_dpi = x_density
                    y_dpi = y_density

                if units == 2:
                    x_dpi = x_density * 2.54
                    y_dpi = y_density * 2.54

            if marker == 0xFFDA:
                end_marker = True
                continue

            offset = offset + length + 2

        return 'jpeg', width, height, x_dpi, y_dpi

    def _process_bmp(self, data):
        # Extract width and height information from a BMP file.
        width = (unpack('<L', data[18:22]))[0]
        height = (unpack('<L', data[22:26]))[0]
        return 'bmp', width, height

    def _extract_named_ranges(self, defined_names):
        # Extract the named ranges from the sorted list of defined names.
        # These are used in the App.xml file.
        named_ranges = []

        for defined_name in defined_names:

            name = defined_name[0]
            index = defined_name[1]
            sheet_range = defined_name[2]

            # Skip autoFilter ranges.
            if name == '_xlnm._FilterDatabase':
                continue

            # We are only interested in defined names with ranges.
            if '!' in sheet_range:
                sheet_name, _ = sheet_range.split('!', 1)

                # Match Print_Area and Print_Titles xlnm types.
                if name.startswith('_xlnm.'):
                    xlnm_type = name.replace('_xlnm.', '')
                    name = sheet_name + '!' + xlnm_type
                elif index != -1:
                    name = sheet_name + '!' + name

                named_ranges.append(name)

        return named_ranges

    def _get_sheet_index(self, sheetname):
        # Convert a sheet name to its index. Return None otherwise.
        sheetname = sheetname.strip("'")

        if sheetname in self.sheetnames:
            return self.sheetnames.index(sheetname)
        else:
            return None

    def _prepare_vml(self):
        # Iterate through the worksheets and set up the VML objects.
        comment_id = 0
        vml_drawing_id = 0
        vml_data_id = 1
        vml_header_id = 0
        vml_shape_id = 1024
        vml_files = 0
        comment_files = 0
        has_button = False

        for sheet in self.worksheets():
            if not sheet.has_vml and not sheet.has_header_vml:
                continue

            vml_files += 1

            if sheet.has_vml:
                if sheet.has_comments:
                    comment_files += 1
                    comment_id += 1

                vml_drawing_id += 1

                count = sheet._prepare_vml_objects(vml_data_id, vml_shape_id,
                                                   vml_drawing_id, comment_id)

                # Each VML should start with a shape id incremented by 1024.
                vml_data_id += 1 * int((1024 + count) / 1024)
                vml_shape_id += 1024 * int((1024 + count) / 1024)

            if sheet.has_header_vml:
                vml_header_id += 1
                vml_drawing_id += 1
                sheet._prepare_header_vml_objects(vml_header_id,
                                                  vml_drawing_id)

            self.num_vml_files = vml_files
            self.num_comment_files = comment_files

            if len(sheet.buttons_list):
                has_button = True

                # Set the sheet vba_codename if it has a button and the
                # workbook has a vbaProject binary.
                if self.vba_project and sheet.vba_codename is None:
                    sheet.set_vba_name()

        # Add a font format for cell comments.
        if comment_files > 0:
            xf = self.add_format({
                'font_name': 'Tahoma',
                'font_size': 8,
                'color_indexed': 81,
                'font_only': True
            })
            xf._get_xf_index()

        # Set the workbook vba_codename if one of the sheets has a button and
        # the workbook has a vbaProject binary.
        if has_button and self.vba_project and self.vba_codename is None:
            self.set_vba_name()

    def _prepare_tables(self):
        # Set the table ids for the worksheet tables.
        table_id = 0

        for sheet in self.worksheets():
            table_count = len(sheet.tables)

            if not table_count:
                continue

            sheet._prepare_tables(table_id + 1)
            table_id += table_count

    def _add_chart_data(self):
        # Add "cached" data to charts to provide the numCache and strCacher
        # data for series and title/axis ranges.
        worksheets = {}
        seen_ranges = {}

        # Map worksheet names to worksheet objects.
        for worksheet in self.worksheets():
            worksheets[worksheet.name] = worksheet

        for chart in self.charts:

            for c_range in chart.formula_ids.keys():
                r_id = chart.formula_ids[c_range]

                # Skip if the series has user defined data.
                if chart.formula_data[r_id] is not None:
                    if (c_range not in seen_ranges
                            or seen_ranges[c_range] is None):
                        data = chart.formula_data[r_id]
                        seen_ranges[c_range] = data
                    continue

                # Check to see if the data is already cached locally.
                if c_range in seen_ranges:
                    chart.formula_data[r_id] = seen_ranges[c_range]
                    continue

                # Convert the range formula to a sheet name and cell range.
                (sheetname, cells) = self._get_chart_range(c_range)

                # Skip if we couldn't parse the formula.
                if sheetname is None:
                    continue

                # Handle non-contiguous ranges like:
                #     (Sheet1!$A$1:$A$2,Sheet1!$A$4:$A$5).
                # We don't try to parse them. We just return an empty list.
                if sheetname.startswith('('):
                    chart.formula_data[r_id] = []
                    seen_ranges[c_range] = []
                    continue

                # Warn if the name is unknown since it indicates a user error
                # in a chart series formula.
                if sheetname not in worksheets:
                    warn("Unknown worksheet reference '%s' in range "
                         "'%s' passed to add_series()" % (sheetname, c_range))
                    chart.formula_data[r_id] = []
                    seen_ranges[c_range] = []
                    continue

                # Find the worksheet object based on the sheet name.
                worksheet = worksheets[sheetname]

                # Get the data from the worksheet table.
                data = worksheet._get_range_data(*cells)

                # TODO. Handle SST string ids if required.

                # Add the data to the chart.
                chart.formula_data[r_id] = data

                # Store range data locally to avoid lookup if seen again.
                seen_ranges[c_range] = data

    def _get_chart_range(self, c_range):
        # Convert a range formula such as Sheet1!$B$1:$B$5 into a sheet name
        # and cell range such as ( 'Sheet1', 0, 1, 4, 1 ).

        # Split the range formula into sheetname and cells at the last '!'.
        pos = c_range.rfind('!')
        if pos > 0:
            sheetname = c_range[:pos]
            cells = c_range[pos + 1:]
        else:
            return None

        # Split the cell range into 2 cells or else use single cell for both.
        if cells.find(':') > 0:
            (cell_1, cell_2) = cells.split(':', 1)
        else:
            (cell_1, cell_2) = (cells, cells)

        # Remove leading/trailing quotes and convert escaped quotes to single.
        sheetname = sheetname.strip("'")
        sheetname = sheetname.replace("''", "'")

        (row_start, col_start) = xl_cell_to_rowcol(cell_1)
        (row_end, col_end) = xl_cell_to_rowcol(cell_2)

        # Check that we have a 1D range only.
        if row_start != row_end and col_start != col_end:
            return None

        return sheetname, [row_start, col_start, row_end, col_end]

    def _prepare_sst_string_data(self):
        # Convert the SST string data from a dict to a list.
        self.str_table._sort_string_data()

    ###########################################################################
    #
    # XML methods.
    #
    ###########################################################################

    def _write_workbook(self):
        # Write <workbook> element.

        schema = 'http://schemas.openxmlformats.org'
        xmlns = schema + '/spreadsheetml/2006/main'
        xmlns_r = schema + '/officeDocument/2006/relationships'

        attributes = [
            ('xmlns', xmlns),
            ('xmlns:r', xmlns_r),
        ]

        self._xml_start_tag('workbook', attributes)

    def _write_file_version(self):
        # Write the <fileVersion> element.

        app_name = 'xl'
        last_edited = 4
        lowest_edited = 4
        rup_build = 4505

        attributes = [
            ('appName', app_name),
            ('lastEdited', last_edited),
            ('lowestEdited', lowest_edited),
            ('rupBuild', rup_build),
        ]

        if self.vba_project:
            attributes.append(
                ('codeName', '{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}'))

        self._xml_empty_tag('fileVersion', attributes)

    def _write_workbook_pr(self):
        # Write <workbookPr> element.
        default_theme_version = 124226
        attributes = []

        if self.vba_codename:
            attributes.append(('codeName', self.vba_codename))
        if self.date_1904:
            attributes.append(('date1904', 1))

        attributes.append(('defaultThemeVersion', default_theme_version))

        self._xml_empty_tag('workbookPr', attributes)

    def _write_book_views(self):
        # Write <bookViews> element.
        self._xml_start_tag('bookViews')
        self._write_workbook_view()
        self._xml_end_tag('bookViews')

    def _write_workbook_view(self):
        # Write <workbookView> element.
        attributes = [
            ('xWindow', self.x_window),
            ('yWindow', self.y_window),
            ('windowWidth', self.window_width),
            ('windowHeight', self.window_height),
        ]

        # Store the tabRatio attribute when it isn't the default.
        if self.tab_ratio != 500:
            attributes.append(('tabRatio', self.tab_ratio))

        # Store the firstSheet attribute when it isn't the default.
        if self.worksheet_meta.firstsheet > 0:
            firstsheet = self.worksheet_meta.firstsheet + 1
            attributes.append(('firstSheet', firstsheet))

        # Store the activeTab attribute when it isn't the first sheet.
        if self.worksheet_meta.activesheet > 0:
            attributes.append(('activeTab', self.worksheet_meta.activesheet))

        self._xml_empty_tag('workbookView', attributes)

    def _write_sheets(self):
        # Write <sheets> element.
        self._xml_start_tag('sheets')

        id_num = 1
        for worksheet in self.worksheets():
            self._write_sheet(worksheet.name, id_num, worksheet.hidden)
            id_num += 1

        self._xml_end_tag('sheets')

    def _write_sheet(self, name, sheet_id, hidden):
        # Write <sheet> element.
        attributes = [
            ('name', name),
            ('sheetId', sheet_id),
        ]

        if hidden:
            attributes.append(('state', 'hidden'))

        attributes.append(('r:id', 'rId' + str(sheet_id)))

        self._xml_empty_tag('sheet', attributes)

    def _write_calc_pr(self):
        # Write the <calcPr> element.
        attributes = [('calcId', self.calc_id)]

        if self.calc_mode == 'manual':
            attributes.append(('calcMode', self.calc_mode))
            attributes.append(('calcOnSave', "0"))
        elif self.calc_mode == 'autoNoTable':
            attributes.append(('calcMode', self.calc_mode))

        if self.calc_on_load:
            attributes.append(('fullCalcOnLoad', '1'))

        self._xml_empty_tag('calcPr', attributes)

    def _write_defined_names(self):
        # Write the <definedNames> element.
        if not self.defined_names:
            return

        self._xml_start_tag('definedNames')

        for defined_name in self.defined_names:
            self._write_defined_name(defined_name)

        self._xml_end_tag('definedNames')

    def _write_defined_name(self, defined_name):
        # Write the <definedName> element.
        name = defined_name[0]
        sheet_id = defined_name[1]
        sheet_range = defined_name[2]
        hidden = defined_name[3]

        attributes = [('name', name)]

        if sheet_id != -1:
            attributes.append(('localSheetId', sheet_id))
        if hidden:
            attributes.append(('hidden', 1))

        self._xml_data_element('definedName', sheet_range, attributes)
Ejemplo n.º 6
0
class Workbook(xmlwriter.XMLwriter):
    """
    A class for writing the Excel XLSX Workbook file.


    """

    ###########################################################################
    #
    # Public API.
    #
    ###########################################################################

    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename
        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.optimization = options.get('constant_memory', 0)
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []

        # Add the default cell format.
        self.add_format({'xf_index': 0})

    def __del__(self):
        """Close file in destructor if it hasn't been closed explicitly."""
        if not self.fileclosed:
            self.close()

    def add_worksheet(self, name=None):
        """
        Add a new worksheet to the Excel workbook.

        Args:
            name: The worksheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a worksheet object.

        """
        sheet_index = len(self.worksheets_objs)
        name = self._check_sheetname(name)

        # Encode any string options passed by the user.
        name = encode_utf8(name)

        # Initialisation data to pass to the worksheet.
        init_data = {
            'name': name,
            'index': sheet_index,
            'str_table': self.str_table,
            'worksheet_meta': self.worksheet_meta,
            'optimization': self.optimization,
            'tmpdir': self.tmpdir,
            'date_1904': self.date_1904,
        }

        worksheet = Worksheet()
        worksheet._initialize(init_data)

        self.worksheets_objs.append(worksheet)
        self.sheetnames.append(name)

        return worksheet

    def add_format(self, properties={}):
        """
        Add a new Format to the Excel Workbook.

        Args:
            properties: The format properties.

        Returns:
            Reference to a Format object.

        """
        xf_format = Format(properties,
                           self.xf_format_indices,
                           self.dxf_format_indices)

        # Store the format reference.
        self.formats.append(xf_format)

        return xf_format

    def add_chart(self, options):
        """
        Create a chart object.

        Args:
            options: The chart type and subtype options.

        Returns:
            Reference to a Chart object.

        """

        # Type must be specified so we can create the required chart instance.
        chart_type = options.get('type', 'None')
        if chart_type is None:
            warn("Chart type must be defined in add_chart()")
            return

        if chart_type == 'area':
            chart = ChartArea(options)
        elif chart_type == 'bar':
            chart = ChartBar(options)
        elif chart_type == 'column':
            chart = ChartColumn(options)
        elif chart_type == 'line':
            chart = ChartLine(options)
        elif chart_type == 'pie':
            chart = ChartPie(options)
        elif chart_type == 'radar':
            chart = ChartRadar(options)
        elif chart_type == 'scatter':
            chart = ChartScatter(options)
        elif chart_type == 'stock':
            chart = ChartStock(options)
        else:
            warn("Unknown chart type '%s' in add_chart()" % chart_type)
            return

        # Set the embedded chart name if present.
        if 'name' in options:
            chart.chart_name = options['name']

        chart._set_embedded_config_data()
        self.charts.append(chart)

        return chart

    def close(self):
        """
        Call finalisation code and close file.

        Args:
            None.

        Returns:
            Nothing.

        """
        if not self.fileclosed:
            self.fileclosed = 1
            self._store_workbook()

    def set_properties(self, properties):
        """
        Set the document properties such as Title, Author etc.

        Args:
            properties: Dictionary of document properties.

        Returns:
            Nothing.

        """
        self.doc_properties = properties

    def define_name(self, name, formula):
        # Create a defined name in Excel. We handle global/workbook level
        # names and local/worksheet names.
        """
        Create a defined name in the workbook.

        Args:
            name:    The defined name.
            formula: The cell or range that the defined name refers to.

        Returns:
            Nothing.

        """
        sheet_index = None
        sheetname = ''

        # Remove the = sign from the formula if it exists.
        if formula.startswith('='):
            formula = formula.lstrip('=')

        # Local defined names are formatted like "Sheet1!name".
        sheet_parts = re.compile(r'^(.*)!(.*)$')
        match = sheet_parts.match(name)

        if match:
            sheetname = match.group(1)
            name = match.group(2)
            sheet_index = self._get_sheet_index(sheetname)

            # Warn if the sheet index wasn't found.
            if sheet_index is None:
                warn("Unknown sheet name '%s' in defined_name()" % sheetname)
                return -1
        else:
            # Use -1 to indicate global names.
            sheet_index = -1

        # Warn if the sheet name contains invalid chars as defined by Excel.
        if not re.match(r'^[a-zA-Z_\\][a-zA-Z_.]+', name):
            warn("Invalid Excel characters in defined_name(): '%s'" % name)
            return -1

        # Warn if the sheet name looks like a cell name.
        if re.match(r'^[a-zA-Z][a-zA-Z]?[a-dA-D]?[0-9]+$', name):
            warn("Name looks like a cell name in defined_name(): '%s'" % name)
            return -1

        # Encode any string options passed by the user.
        name = encode_utf8(name)
        formula = encode_utf8(formula)

        self.defined_names.append([name, sheet_index, formula, False])

    def worksheets(self):
        """
        Return a list of the worksheet objects in the workbook.

        Args:
            None.

        Returns:
            A list of worksheet objects.

        """
        return self.worksheets_objs

    ###########################################################################
    #
    # Private API.
    #
    ###########################################################################

    def _assemble_xml_file(self):
        # Assemble and write the XML file.

        # Prepare format object for passing to Style.pm.
        self._prepare_format_properties()

        # Write the XML declaration.
        self._xml_declaration()

        # Write the workbook element.
        self._write_workbook()

        # Write the fileVersion element.
        self._write_file_version()

        # Write the workbookPr element.
        self._write_workbook_pr()

        # Write the bookViews element.
        self._write_book_views()

        # Write the sheets element.
        self._write_sheets()

        # Write the workbook defined names.
        self._write_defined_names()

        # Write the calcPr element.
        self._write_calc_pr()

        # Close the workbook tag.
        self._xml_end_tag('workbook')

        # Close the file.
        self._xml_close()

    def _store_workbook(self):
        # Assemble worksheets into a workbook.
        temp_dir = tempfile.mkdtemp(dir=self.tmpdir)
        packager = Packager()

        # Add a default worksheet if non have been added.
        if not self.worksheets():
            self.add_worksheet()

        # Ensure that at least one worksheet has been selected.
        if self.worksheet_meta.activesheet == 0:
            self.worksheets_objs[0].selected = 1
            self.worksheets_objs[0].hidden = 0

        # Set the active sheet.
        for sheet in self.worksheets():
            if sheet.index == self.worksheet_meta.activesheet:
                sheet.active = 1

        # Convert the SST strings data structure.
        self._prepare_sst_string_data()

        # Prepare the worksheet VML elements such as comments and buttons.
        self._prepare_vml()

        # Set the defined names for the worksheets such as Print Titles.
        self._prepare_defined_names()

        # Prepare the drawings, charts and images.
        self._prepare_drawings()

        # Add cached data to charts.
        self._add_chart_data()

        # Package the workbook.
        packager._add_workbook(self)
        packager._set_package_dir(temp_dir)
        packager._create_package()

        # Free up the Packager object.
        packager = None

        xlsx_file = ZipFile(self.filename, "w", compression=ZIP_DEFLATED)

        # Add separator to temp dir so we have a root to strip from paths.
        dir_root = os.path.join(temp_dir, '')

        # Iterate through files in the temp dir and add them to the xlsx file.
        for dirpath, _, filenames in os.walk(temp_dir):
            for name in filenames:
                abs_filename = os.path.join(dirpath, name)
                rel_filename = abs_filename.replace(dir_root, '')
                xlsx_file.write(abs_filename, rel_filename)

        shutil.rmtree(temp_dir)
        xlsx_file.close()

    def _check_sheetname(self, sheetname, is_chart=False):
        # Check for valid worksheet names. We check the length, if it contains
        # any invalid chars and if the sheetname is unique in the workbook.
        invalid_char = re.compile(r'[\[\]:*?/\\]')

        # Increment the Sheet/Chart number used for default sheet names below.
        if is_chart:
            self.chartname_count += 1
        else:
            self.sheetname_count += 1

        # Supply default Sheet/Chart sheetname if none has been defined.
        if sheetname is None:
            if is_chart:
                sheetname = self.chart_name + str(self.chartname_count)
            else:
                sheetname = self.sheet_name + str(self.sheetname_count)

        # Check that sheet sheetname is <= 31. Excel limit.
        if len(sheetname) > 31:
            raise Exception("Excel worksheet name '%s' must be <= 31 chars." %
                            sheetname)

        # Check that sheetname doesn't contain any invalid characters
        if invalid_char.search(sheetname):
            raise Exception(
                "Invalid Excel character '[]:*?/\\' in sheetname '%s'" %
                sheetname)

        # Check that the worksheet name doesn't already exist since this is a
        # fatal Excel error. The check must be case insensitive like Excel.
        for worksheet in self.worksheets():
            if sheetname.lower() == worksheet.name.lower():
                raise Exception(
                    "Sheetname '%s', with case ignored, is already in use." %
                    sheetname)

        return sheetname

    def _prepare_format_properties(self):
        # Prepare all Format properties prior to passing them to styles.py.

        # Separate format objects into XF and DXF formats.
        self._prepare_formats()

        # Set the font index for the format objects.
        self._prepare_fonts()

        # Set the number format index for the format objects.
        self._prepare_num_formats()

        # Set the border index for the format objects.
        self._prepare_borders()

        # Set the fill index for the format objects.
        self._prepare_fills()

    def _prepare_formats(self):
        # Iterate through the XF Format objects and separate them into
        # XF and DXF formats. The XF and DF formats then need to be sorted
        # back into index order rather than creation order.
        xf_formats = []
        dxf_formats = []

        # Sort into XF and DXF formats.
        for xf_format in self.formats:
            if xf_format.xf_index is not None:
                xf_formats.append(xf_format)

            if xf_format.dxf_index is not None:
                dxf_formats.append(xf_format)

        # Pre-extend the format lists.
        self.xf_formats = [None] * len(xf_formats)
        self.dxf_formats = [None] * len(dxf_formats)

        # Rearrange formats into index order.
        for xf_format in xf_formats:
            index = xf_format.xf_index
            self.xf_formats[index] = xf_format

        for dxf_format in dxf_formats:
            index = dxf_format.dxf_index
            self.dxf_formats[index] = dxf_format

    def _set_default_xf_indices(self):
        # Set the default index for each format. Mainly used for testing.
        for xf_format in self.formats:
            xf_format._get_xf_index()

    def _prepare_fonts(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default font elements.
        fonts = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_font_key()
            if key in fonts:
                # Font has already been used.
                xf_format.font_index = fonts[key]
                xf_format.has_font = 0
            else:
                # This is a new font.
                fonts[key] = index
                xf_format.font_index = index
                xf_format.has_font = 1
                index += 1

        self.font_count = index

        # For DXF formats we only need to check if the properties have changed.
        for xf_format in self.dxf_formats:
            # The only font properties that can change for a DXF format are:
            # color, bold, italic, underline and strikethrough.
            if (xf_format.font_color or xf_format.bold or xf_format.italic
                    or xf_format.underline or xf_format.font_strikeout):
                xf_format.has_dxf_font = 1

    def _prepare_num_formats(self):
        # User records is not None start from index 0xA4.
        num_formats = {}
        index = 164
        num_format_count = 0

        is_number = re.compile(r'^\d+$')
        is_zeroes = re.compile(r'^0+\d')

        for xf_format in (self.xf_formats + self.dxf_formats):
            num_format = xf_format.num_format
            # Check if num_format is an index to a built-in number format.
            # Also check for a string of zeros, which is a valid number
            # format string but would evaluate to zero.
            if (is_number.match(str(num_format))
                    and not is_zeroes.match(str(num_format))):
                # Index to a built-in number xf_format.
                xf_format.num_format_index = num_format
                continue

            if num_format in num_formats:
                # Number xf_format has already been used.
                xf_format.num_format_index = num_formats[num_format]
            else:
                # Add a new number xf_format.
                num_formats[num_format] = index
                xf_format.num_format_index = index
                index += 1

                # Only increase font count for XF formats (not DXF formats).
                if xf_format.xf_index:
                    num_format_count += 1

        self.num_format_count = num_format_count

    def _prepare_borders(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default border elements.
        borders = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_border_key()

            if key in borders:
                # Border has already been used.
                xf_format.border_index = borders[key]
                xf_format.has_border = 0
            else:
                # This is a new border.
                borders[key] = index
                xf_format.border_index = index
                xf_format.has_border = 1
                index += 1

        self.border_count = index

        # For DXF formats we only need to check if the properties have changed.
        has_border = re.compile(r'[^0:]')

        for xf_format in self.dxf_formats:
            key = xf_format._get_border_key()

            if has_border.search(key):
                xf_format.has_dxf_border = 1

    def _prepare_fills(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default fill elements.
        # The user defined fill properties start from 2 since there are 2
        # default fills: patternType="none" and patternType="gray125".
        fills = {}
        index = 2  # Start from 2. See above.

        # Add the default fills.
        fills['0:0:0'] = 0
        fills['17:0:0'] = 1

        # Store the DXF colours separately since them may be reversed below.
        for xf_format in self.dxf_formats:
            if (xf_format.pattern or xf_format.bg_color or xf_format.fg_color):
                xf_format.has_dxf_fill = 1
                xf_format.dxf_bg_color = xf_format.bg_color
                xf_format.dxf_fg_color = xf_format.fg_color

        for xf_format in self.xf_formats:
            # The following logical statements jointly take care of special
            # cases in relation to cell colours and patterns:
            # 1. For a solid fill (_pattern == 1) Excel reverses the role of
            # foreground and background colours, and
            # 2. If the user specifies a foreground or background colour
            # without a pattern they probably wanted a solid fill, so we fill
            # in the defaults.
            if (xf_format.pattern == 1 and xf_format.bg_color != 0
                    and xf_format.fg_color != 0):
                tmp = xf_format.fg_color
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = tmp

            if (xf_format.pattern <= 1 and xf_format.bg_color != 0
                    and xf_format.fg_color == 0):
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = 0
                xf_format.pattern = 1

            if (xf_format.pattern <= 1 and xf_format.bg_color == 0
                    and xf_format.fg_color != 0):
                xf_format.bg_color = 0
                xf_format.pattern = 1

            key = xf_format._get_fill_key()

            if key in fills:
                # Fill has already been used.
                xf_format.fill_index = fills[key]
                xf_format.has_fill = 0
            else:
                # This is a new fill.
                fills[key] = index
                xf_format.fill_index = index
                xf_format.has_fill = 1
                index += 1

        self.fill_count = index

    def _prepare_defined_names(self):
        # Iterate through the worksheets and store any defined names in
        # addition to any user defined names. Stores the defined names
        # for the Workbook.xml and the named ranges for App.xml.
        defined_names = self.defined_names

        for sheet in self.worksheets():
            # Check for Print Area settings.
            if sheet.autofilter_area:
                hidden = 1
                sheet_range = sheet.autofilter_area
                # Store the defined names.
                defined_names.append(['_xlnm._FilterDatabase',
                                      sheet.index, sheet_range, hidden])

            # Check for Print Area settings.
            if sheet.print_area_range:
                hidden = 0
                sheet_range = sheet.print_area_range
                # Store the defined names.
                defined_names.append(['_xlnm.Print_Area',
                                      sheet.index, sheet_range, hidden])

            # Check for repeat rows/cols referred to as Print Titles.
            if sheet.repeat_col_range or sheet.repeat_row_range:
                hidden = 0
                sheet_range = ''
                if sheet.repeat_col_range and sheet.repeat_row_range:
                    sheet_range = (sheet.repeat_col_range + ',' +
                                   sheet.repeat_row_range)
                else:
                    sheet_range = (sheet.repeat_col_range +
                                   sheet.repeat_row_range)
                # Store the defined names.
                defined_names.append(['_xlnm.Print_Titles',
                                      sheet.index, sheet_range, hidden])

        defined_names = self._sort_defined_names(defined_names)
        self.defined_names = defined_names
        self.named_ranges = self._extract_named_ranges(defined_names)

    def _sort_defined_names(self, names):
        # Sort the list of list of internal and user defined names in
        # the same order as used by Excel.

        # Add a normalise name string to each list for sorting.
        for name_list in names:
            (defined_name, _, sheet_name, _) = name_list

            # Normalise the defined name by removing any leading '_xmln.'
            # from internal names and lowercasing the string.
            defined_name = defined_name.replace('_xlnm.', '').lower()

            # Normalise the sheetname by removing the leading quote and
            # lowercasing the string.
            sheet_name = sheet_name.lstrip("'").lower()

            name_list.append(defined_name + "::" + sheet_name)

        # Remove the extra key for sorting.
        names.sort(key=operator.itemgetter(4))

        for name_list in names:
            name_list.pop()

        return names

    def _prepare_drawings(self):
        # Iterate through the worksheets and set up chart and image drawings.
        chart_ref_id = 0
        image_ref_id = 0
        drawing_id = 0

        for sheet in self.worksheets():
            chart_count = len(sheet.charts)
            image_count = len(sheet.images)
            shape_count = len(sheet.shapes)

            if not (chart_count + image_count + shape_count):
                continue

            drawing_id += 1

            for index in range(chart_count):
                chart_ref_id += 1
                sheet._prepare_chart(index, chart_ref_id, drawing_id)

            for index in range(image_count):
                filename = sheet.images[index][2]
                (image_type, width, height, name) = \
                    self._get_image_properties(filename)
                image_ref_id += 1

                sheet._prepare_image(index, image_ref_id, drawing_id, width,
                                     height, name, image_type)

            # for index in range(shape_count):
            #    sheet._prepare_shape(index, drawing_id)

            drawing = sheet.drawing
            self.drawings.append(drawing)

        # Sort the workbook charts references into the order that the were
        # written from the worksheets above.
        self.charts = sorted(self.charts, key=lambda chart: chart.id)

        self.drawing_count = drawing_id

    def _get_image_properties(self, filename):
        # Extract dimension information from the image file.
        height = 0
        width = 0

        # Open the image file and read in the data.
        fh = open(filename, "rb")
        data = fh.read()

        # Get the image filename without the path.
        image_name = os.path.basename(filename)

        # Look for some common image file markers.
        marker1 = (unpack('3s', data[1:4]))[0]
        marker2 = (unpack('>H', data[:2]))[0]
        marker3 = (unpack('4s', data[6:10]))[0]
        marker4 = (unpack('2s', data[:2]))[0]

        if marker1 == b'PNG':
            self.image_types['png'] = 1
            (image_type, width, height) = self._process_png(data)

        elif (marker2 == 0xFFD8 and
              (marker3 == b'JFIF' or marker3 == b'EXIF')):
            self.image_types['jpeg'] = 1
            (image_type, width, height) = self._process_jpg(data)

        elif (marker4 == b'BM'):
            self.image_types['bmp'] = 1
            (image_type, width, height) = self._process_bmp(data)

        else:
            raise Exception("%s: Unknown or unsupported file type." % filename)

        # Check that we found the required data.
        if not height or not width:
            raise Exception("%s: no size data found in image file." % filename)

        # Store image data to copy it into file container.
        self.images.append([filename, image_type])

        fh.close()
        return (image_type, width, height, image_name)

    def _process_png(self, data):
        # Extract width and height information from a PNG file.
        width = (unpack('>I', data[16:20]))[0]
        height = (unpack('>I', data[20:24]))[0]

        return ('png', width, height)

    def _process_jpg(self, data):
        # Extract width and height information from a JPEG file.
        offset = 2
        data_length = len(data)

        # Search through the image data to find the 0xFFC0 marker.
        # The height and width are contained in the data for that
        # sub-element.
        found = 0
        while not found and offset < data_length:

            marker = (unpack('>H', data[offset + 0:offset + 2]))[0]
            length = (unpack('>H', data[offset + 2:offset + 4]))[0]

            if marker == 0xFFC0 or marker == 0xFFC2:
                height = (unpack('>H', data[offset + 5:offset + 7]))[0]
                width = (unpack('>H', data[offset + 7:offset + 9]))[0]
                found = 1
                continue

            offset = offset + length + 2

            if marker == 0xFFDA:
                found = 1
                continue

        return ('jpeg', width, height)

    def _process_bmp(self, data):
        # Extract width and height information from a BMP file.
        width = (unpack('<L', data[18:22]))[0]
        height = (unpack('<L', data[22:26]))[0]
        return ('bmp', width, height)

    def _extract_named_ranges(self, defined_names):
        # Extract the named ranges from the sorted list of defined names.
        # These are used in the App.xml file.
        named_ranges = []

        for defined_name in (defined_names):

            name = defined_name[0]
            index = defined_name[1]
            sheet_range = defined_name[2]

            # Skip autoFilter ranges.
            if name == '_xlnm._FilterDatabase':
                continue

            # We are only interested in defined names with ranges.
            if '!' in sheet_range:
                sheet_name, _ = sheet_range.split('!', 1)

                # Match Print_Area and Print_Titles xlnm types.
                if name.startswith('_xlnm.'):
                    xlnm_type = name.replace('_xlnm.', '')
                    name = sheet_name + '!' + xlnm_type
                elif index != -1:
                    name = sheet_name + '!' + name

                named_ranges.append(name)

        return named_ranges

    def _get_sheet_index(self, sheetname):
        # Convert a sheet name to its index. Return None otherwise.
        sheetname = sheetname.strip("'")

        if sheetname in self.sheetnames:
            return self.sheetnames.index(sheetname)
        else:
            return None

    #
    # Iterate through the worksheets and set up the VML objects.
    #
    def _prepare_vml(self):
        comment_id = 0
        vml_data_id = 1
        vml_shape_id = 1024
        vml_files = 0
        comment_files = 0

        for sheet in self.worksheets():
            if not sheet.has_vml:
                continue

            vml_files += 1

            if sheet.has_comments:
                comment_files += 1

            comment_id += 1
            count = sheet._prepare_vml_objects(vml_data_id,
                                               vml_shape_id,
                                               comment_id)

            # Each VML file should start with a shape id incremented by 1024.
            vml_data_id += 1 * int((1024 + count) / 1024)
            vml_shape_id += 1024 * int((1024 + count) / 1024)

        self.num_vml_files = vml_files
        self.num_comment_files = comment_files

        # Add a font format for cell comments.
        if comment_files > 0:
            xf = self.add_format({'font_name': 'Tahoma', 'font_size': 8,
                                  'color_indexed': 81, 'font_only': True})
            xf._get_xf_index()

    def _add_chart_data(self):
        # Add "cached" data to charts to provide the numCache and strCacher
        # data for series and title/axis ranges.
        worksheets = {}
        seen_ranges = {}

        # Map worksheet names to worksheet objects.
        for worksheet in self.worksheets():
            worksheets[worksheet.name] = worksheet

        for chart in self.charts:

            for c_range in chart.formula_ids.keys():
                r_id = chart.formula_ids[c_range]

                # Skip if the series has user defined data.
                if chart.formula_data[r_id] is not None:
                    if (not c_range in seen_ranges
                            or seen_ranges[c_range] is None):
                        data = chart.formula_data[r_id]
                        seen_ranges[c_range] = data
                    continue

                # Check to see if the data is already cached locally.
                if c_range in seen_ranges:
                    chart.formula_data[r_id] = seen_ranges[c_range]
                    continue

                # Convert the range formula to a sheet name and cell range.
                (sheetname, cells) = self._get_chart_range(c_range)

                # Skip if we couldn't parse the formula.
                if sheetname is None:
                    continue

                # Die if the name is unknown since it indicates a user error in
                # a chart series formula.
                if not sheetname in worksheets:
                    warn("Unknown worksheet reference '%s' in range "
                         "'%s' passed to add_series()" % (sheetname, c_range))

                # Find the worksheet object based on the sheet name.
                worksheet = worksheets[sheetname]

                # Get the data from the worksheet table.
                data = worksheet._get_range_data(*cells)

                # TODO
                #   # Ignore rich strings for now. Deparse later if necessary.
                #        if token =~ m{^<r>} and token =~ m{</r>$}:
                #            token = ''

                # Add the data to the chart.
                chart.formula_data[r_id] = data

                # Store range data locally to avoid lookup if seen again.
                seen_ranges[c_range] = data

    def _get_chart_range(self, c_range):
        # Convert a range formula such as Sheet1!$B$1:$B$5 into a sheet name
        # and cell range such as ( 'Sheet1', 0, 1, 4, 1 ).

        # Split the range formula into sheetname and cells at the last '!'.
        # TODO. Fix this to match from right.
        pos = c_range.find('!')
        if pos > 0:
            sheetname, cells = c_range.split('!')
        else:
            return None

        # Split the cell range into 2 cells or else use single cell for both.
        if cells.find(':') > 0:
            (cell_1, cell_2) = cells.split(':')
        else:
            (cell_1, cell_2) = (cells, cells)

        # Remove leading/trailing quotes and convert escaped quotes to single.
        sheetname = sheetname.strip("'")
        sheetname = sheetname.replace("''", "'")

        (row_start, col_start) = xl_cell_to_rowcol(cell_1)
        (row_end, col_end) = xl_cell_to_rowcol(cell_2)

        # Check that we have a 1D range only.
        if row_start != row_end and col_start != col_end:
            return None

        return (sheetname, [row_start, col_start, row_end, col_end])

    def _prepare_sst_string_data(self):
        # Convert the SST string data from a dict to a list.
        self.str_table._sort_string_data()

    ###########################################################################
    #
    # XML methods.
    #
    ###########################################################################

    def _write_workbook(self):
        # Write <workbook> element.

        schema = 'http://schemas.openxmlformats.org'
        xmlns = schema + '/spreadsheetml/2006/main'
        xmlns_r = schema + '/officeDocument/2006/relationships'

        attributes = [
            ('xmlns', xmlns),
            ('xmlns:r', xmlns_r),
        ]

        self._xml_start_tag('workbook', attributes)

    def _write_file_version(self):
        # Write the <fileVersion> element.

        app_name = 'xl'
        last_edited = 4
        lowest_edited = 4
        rup_build = 4505

        attributes = [
            ('appName', app_name),
            ('lastEdited', last_edited),
            ('lowestEdited', lowest_edited),
            ('rupBuild', rup_build),
        ]

        if self.vba_project:
            attributes.append(
                ('codeName', '{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}'))

        self._xml_empty_tag('fileVersion', attributes)

    def _write_workbook_pr(self):
        # Write <workbookPr> element.
        default_theme_version = 124226
        attributes = []

        if self.vba_codename:
            attributes.append(('codeName', self.vba_codename))
        if self.date_1904:
            attributes.append(('date1904', 1))

        attributes.append(('defaultThemeVersion', default_theme_version))

        self._xml_empty_tag('workbookPr', attributes)

    def _write_book_views(self):
        # Write <bookViews> element.
        self._xml_start_tag('bookViews')
        self._write_workbook_view()
        self._xml_end_tag('bookViews')

    def _write_workbook_view(self):
        # Write <workbookView> element.
        attributes = [
            ('xWindow', self.x_window),
            ('yWindow', self.y_window),
            ('windowWidth', self.window_width),
            ('windowHeight', self.window_height),
        ]

        # Store the tabRatio attribute when it isn't the default.
        if self.tab_ratio != 500:
            attributes.append(('tabRatio', self.tab_ratio))

        # Store the firstSheet attribute when it isn't the default.
        if self.worksheet_meta.firstsheet > 0:
            firstsheet = self.worksheet_meta.firstsheet + 1
            attributes.append(('firstSheet', firstsheet))

        # Store the activeTab attribute when it isn't the first sheet.
        if self.worksheet_meta.activesheet > 0:
            attributes.append(('activeTab', self.worksheet_meta.activesheet))

        self._xml_empty_tag('workbookView', attributes)

    def _write_sheets(self):
        # Write <sheets> element.
        self._xml_start_tag('sheets')

        id_num = 1
        for worksheet in self.worksheets():
            self._write_sheet(worksheet.name, id_num, worksheet.hidden)
            id_num += 1

        self._xml_end_tag('sheets')

    def _write_sheet(self, name, sheet_id, hidden):
        # Write <sheet> element.
        attributes = [
            ('name', name),
            ('sheetId', sheet_id),
        ]

        if hidden:
            attributes.append(('state', 'hidden'))

        attributes.append(('r:id', 'rId' + str(sheet_id)))

        self._xml_empty_tag('sheet', attributes)

    def _write_calc_pr(self):
        # Write the <calcPr> element.

        calc_id = '124519'

        attributes = [('calcId', calc_id)]

        self._xml_empty_tag('calcPr', attributes)

    def _write_defined_names(self):
        # Write the <definedNames> element.
        if not self.defined_names:
            return

        self._xml_start_tag('definedNames')

        for defined_name in (self.defined_names):
            self._write_defined_name(defined_name)

        self._xml_end_tag('definedNames')

    def _write_defined_name(self, defined_name):
        # Write the <definedName> element.
        name = defined_name[0]
        sheet_id = defined_name[1]
        sheet_range = defined_name[2]
        hidden = defined_name[3]

        attributes = [('name', name)]

        if sheet_id != -1:
            attributes.append(('localSheetId', sheet_id))
        if hidden:
            attributes.append(('hidden', 1))

        self._xml_data_element('definedName', sheet_range, attributes)
Ejemplo n.º 7
0
    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename

        self.tmpdir = options.get('tmpdir', None)
        self.date_1904 = options.get('date_1904', False)
        self.strings_to_numbers = options.get('strings_to_numbers', False)
        self.strings_to_formulas = options.get('strings_to_formulas', True)
        self.strings_to_urls = options.get('strings_to_urls', True)
        self.default_date_format = options.get('default_date_format', None)
        self.optimization = options.get('constant_memory', False)
        self.in_memory = options.get('in_memory', False)
        self.excel2003_style = options.get('excel2003_style', False)
        self.default_format_properties = \
            options.get('default_format_properties', {})

        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = 'Sheet'
        self.chart_name = 'Chart'
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []
        self.border_count = 0
        self.fill_count = 0
        self.drawing_count = 0
        self.calc_mode = "auto"
        self.calc_on_load = True
        self.allow_zip64 = False
        self.calc_id = 124519

        # We can't do 'constant_memory' mode while doing 'in_memory' mode.
        if self.in_memory:
            self.optimization = False

        # Add the default cell format.
        if self.excel2003_style:
            self.add_format({'xf_index': 0, 'font_family': 0})
        else:
            self.add_format({'xf_index': 0})

        # Add a default URL format.
        self.default_url_format = self.add_format({'color': 'blue',
                                                   'underline': 1})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = \
                self.add_format({'num_format': self.default_date_format})
Ejemplo n.º 8
0
class Workbook(xmlwriter.XMLwriter):
    """
    A class for writing the Excel XLSX Workbook file.


    """

    ###########################################################################
    #
    # Public API.
    #
    ###########################################################################

    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename

        self.tmpdir = options.get("tmpdir", None)
        self.date_1904 = options.get("date_1904", False)
        self.strings_to_numbers = options.get("strings_to_numbers", False)
        self.strings_to_formulas = options.get("strings_to_formulas", True)
        self.strings_to_urls = options.get("strings_to_urls", True)
        self.nan_inf_to_errors = options.get("nan_inf_to_errors", False)
        self.default_date_format = options.get("default_date_format", None)
        self.optimization = options.get("constant_memory", False)
        self.in_memory = options.get("in_memory", False)
        self.excel2003_style = options.get("excel2003_style", False)
        self.default_format_properties = options.get("default_format_properties", {})

        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = "Sheet"
        self.chart_name = "Chart"
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_is_stream = False
        self.vba_codename = None
        self.image_types = {}
        self.images = []
        self.border_count = 0
        self.fill_count = 0
        self.drawing_count = 0
        self.calc_mode = "auto"
        self.calc_on_load = True
        self.allow_zip64 = False
        self.calc_id = 124519

        # We can't do 'constant_memory' mode while doing 'in_memory' mode.
        if self.in_memory:
            self.optimization = False

        # Add the default cell format.
        if self.excel2003_style:
            self.add_format({"xf_index": 0, "font_family": 0})
        else:
            self.add_format({"xf_index": 0})

        # Add a default URL format.
        self.default_url_format = self.add_format({"color": "blue", "underline": 1})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = self.add_format({"num_format": self.default_date_format})

    def __del__(self):
        """Close file in destructor if it hasn't been closed explicitly."""
        try:
            if not self.fileclosed:
                self.close()
        except:
            raise Exception(
                "Exception caught in workbook destructor. " "Explicit close() may be required for workbook."
            )

    def __enter__(self):
        """Return self object to use with "with" statement."""
        return self

    def __exit__(self, type, value, traceback):
        """Close workbook when exiting "with" statement."""
        self.close()

    def add_worksheet(self, name=None):
        """
        Add a new worksheet to the Excel workbook.

        Args:
            name: The worksheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a worksheet object.

        """
        return self._add_sheet(name, is_chartsheet=False)

    def add_chartsheet(self, name=None):
        """
        Add a new chartsheet to the Excel workbook.

        Args:
            name: The chartsheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a chartsheet object.

        """
        return self._add_sheet(name, is_chartsheet=True)

    def add_format(self, properties={}):
        """
        Add a new Format to the Excel Workbook.

        Args:
            properties: The format properties.

        Returns:
            Reference to a Format object.

        """
        format_properties = self.default_format_properties.copy()

        if self.excel2003_style:
            format_properties = {"font_name": "Arial", "font_size": 10, "theme": 1 * -1}

        format_properties.update(properties)

        xf_format = Format(format_properties, self.xf_format_indices, self.dxf_format_indices)

        # Store the format reference.
        self.formats.append(xf_format)

        return xf_format

    def add_chart(self, options):
        """
        Create a chart object.

        Args:
            options: The chart type and subtype options.

        Returns:
            Reference to a Chart object.

        """

        # Type must be specified so we can create the required chart instance.
        chart_type = options.get("type")
        if chart_type is None:
            warn("Chart type must be defined in add_chart()")
            return

        if chart_type == "area":
            chart = ChartArea(options)
        elif chart_type == "bar":
            chart = ChartBar(options)
        elif chart_type == "column":
            chart = ChartColumn(options)
        elif chart_type == "doughnut":
            chart = ChartDoughnut(options)
        elif chart_type == "line":
            chart = ChartLine(options)
        elif chart_type == "pie":
            chart = ChartPie(options)
        elif chart_type == "radar":
            chart = ChartRadar(options)
        elif chart_type == "scatter":
            chart = ChartScatter(options)
        elif chart_type == "stock":
            chart = ChartStock(options)
        else:
            warn("Unknown chart type '%s' in add_chart()" % chart_type)
            return

        # Set the embedded chart name if present.
        if "name" in options:
            chart.chart_name = options["name"]

        chart.embedded = True
        chart.date_1904 = self.date_1904

        self.charts.append(chart)

        return chart

    def add_vba_project(self, vba_project, is_stream=False):
        """
        Add a vbaProject binary to the Excel workbook.

        Args:
            vba_project: The vbaProject binary file name.
            is_stream:   vba_project is an in memory byte stream.

        Returns:
            Nothing.

        """
        if not is_stream and not os.path.exists(vba_project):
            warn("VBA project binary file '%s' not found." % force_unicode(vba_project))
            return -1

        self.vba_project = vba_project
        self.vba_is_stream = is_stream

    def close(self):
        """
        Call finalization code and close file.

        Args:
            None.

        Returns:
            Nothing.

        """
        if not self.fileclosed:
            self.fileclosed = 1
            self._store_workbook()

    def set_properties(self, properties):
        """
        Set the document properties such as Title, Author etc.

        Args:
            properties: Dictionary of document properties.

        Returns:
            Nothing.

        """
        self.doc_properties = properties

    def set_calc_mode(self, mode, calc_id=None):
        """
        Set the Excel calculation mode for the workbook.

        Args:
            mode: String containing one of:
                * manual
                * auto_except_tables
                * auto

        Returns:
            Nothing.

        """
        self.calc_mode = mode

        if mode == "manual":
            self.calc_on_load = False
        elif mode == "auto_except_tables":
            self.calc_mode = "autoNoTable"

        # Leave undocumented for now. Rarely required.
        if calc_id:
            self.calc_id = calc_id

    def define_name(self, name, formula):
        # Create a defined name in Excel. We handle global/workbook level
        # names and local/worksheet names.
        """
        Create a defined name in the workbook.

        Args:
            name:    The defined name.
            formula: The cell or range that the defined name refers to.

        Returns:
            Nothing.

        """
        sheet_index = None
        sheetname = ""

        # Remove the = sign from the formula if it exists.
        if formula.startswith("="):
            formula = formula.lstrip("=")

        # Local defined names are formatted like "Sheet1!name".
        sheet_parts = re.compile(r"^(.*)!(.*)$")
        match = sheet_parts.match(name)

        if match:
            sheetname = match.group(1)
            name = match.group(2)
            sheet_index = self._get_sheet_index(sheetname)

            # Warn if the sheet index wasn't found.
            if sheet_index is None:
                warn("Unknown sheet name '%s' in defined_name()" % force_unicode(sheetname))
                return -1
        else:
            # Use -1 to indicate global names.
            sheet_index = -1

        # Warn if the defined name contains invalid chars as defined by Excel.
        if not re.match(r"^[\w\\][\w\\.]*$", name, re.UNICODE) or re.match(r"^\d", name):
            warn("Invalid Excel characters in defined_name(): '%s'" % force_unicode(name))
            return -1

        # Warn if the defined name looks like a cell name.
        if re.match(r"^[a-zA-Z][a-zA-Z]?[a-dA-D]?[0-9]+$", name):
            warn("Name looks like a cell name in defined_name(): '%s'" % force_unicode(name))
            return -1

        # Warn if the name looks like a R1C1 cell reference.
        if re.match(r"^[rcRC]$", name) or re.match(r"^[rcRC]\d+[rcRC]\d+$", name):
            warn("Invalid name '%s' like a RC cell ref in defined_name()" % force_unicode(name))
            return -1

        self.defined_names.append([name, sheet_index, formula, False])

    def worksheets(self):
        """
        Return a list of the worksheet objects in the workbook.

        Args:
            None.

        Returns:
            A list of worksheet objects.

        """
        return self.worksheets_objs

    def use_zip64(self):
        """
        Allow ZIP64 extensions when writing xlsx file zip container.

        Args:
            None.

        Returns:
            Nothing.

        """
        self.allow_zip64 = True

    def set_vba_name(self, name=None):
        """
        Set the VBA name for the workbook. By default the workbook is referred
        to as ThisWorkbook in VBA.

        Args:
            name: The VBA name for the workbook.

        Returns:
            Nothing.

        """
        if name is not None:
            self.vba_codename = name
        else:
            self.vba_codename = "ThisWorkbook"

    ###########################################################################
    #
    # Private API.
    #
    ###########################################################################

    def _assemble_xml_file(self):
        # Assemble and write the XML file.

        # Prepare format object for passing to Style.pm.
        self._prepare_format_properties()

        # Write the XML declaration.
        self._xml_declaration()

        # Write the workbook element.
        self._write_workbook()

        # Write the fileVersion element.
        self._write_file_version()

        # Write the workbookPr element.
        self._write_workbook_pr()

        # Write the bookViews element.
        self._write_book_views()

        # Write the sheets element.
        self._write_sheets()

        # Write the workbook defined names.
        self._write_defined_names()

        # Write the calcPr element.
        self._write_calc_pr()

        # Close the workbook tag.
        self._xml_end_tag("workbook")

        # Close the file.
        self._xml_close()

    def _store_workbook(self):
        # Assemble worksheets into a workbook.
        packager = Packager()

        # Add a default worksheet if non have been added.
        if not self.worksheets():
            self.add_worksheet()

        # Ensure that at least one worksheet has been selected.
        if self.worksheet_meta.activesheet == 0:
            self.worksheets_objs[0].selected = 1
            self.worksheets_objs[0].hidden = 0

        # Set the active sheet.
        for sheet in self.worksheets():
            if sheet.index == self.worksheet_meta.activesheet:
                sheet.active = 1

        # Convert the SST strings data structure.
        self._prepare_sst_string_data()

        # Prepare the worksheet VML elements such as comments and buttons.
        self._prepare_vml()

        # Set the defined names for the worksheets such as Print Titles.
        self._prepare_defined_names()

        # Prepare the drawings, charts and images.
        self._prepare_drawings()

        # Add cached data to charts.
        self._add_chart_data()

        # Prepare the worksheet tables.
        self._prepare_tables()

        # Package the workbook.
        packager._add_workbook(self)
        packager._set_tmpdir(self.tmpdir)
        packager._set_in_memory(self.in_memory)
        xml_files = packager._create_package()

        # Free up the Packager object.
        packager = None

        xlsx_file = ZipFile(self.filename, "w", compression=ZIP_DEFLATED, allowZip64=self.allow_zip64)

        # Add XML sub-files to the Zip file with their Excel filename.
        for os_filename, xml_filename, is_binary in xml_files:
            if self.in_memory:
                # The files are in-memory StringIOs.
                if is_binary:
                    xlsx_file.writestr(xml_filename, os_filename.getvalue())
                else:
                    xlsx_file.writestr(xml_filename, os_filename.getvalue().encode("utf-8"))
            else:
                # The files are tempfiles.
                xlsx_file.write(os_filename, xml_filename)
                os.remove(os_filename)

        xlsx_file.close()

    def _add_sheet(self, name, is_chartsheet):
        # Utility for shared code in add_worksheet() and add_chartsheet().

        sheet_index = len(self.worksheets_objs)
        name = self._check_sheetname(name, is_chartsheet)

        # Initialisation data to pass to the worksheet.
        init_data = {
            "name": name,
            "index": sheet_index,
            "str_table": self.str_table,
            "worksheet_meta": self.worksheet_meta,
            "optimization": self.optimization,
            "tmpdir": self.tmpdir,
            "date_1904": self.date_1904,
            "strings_to_numbers": self.strings_to_numbers,
            "strings_to_formulas": self.strings_to_formulas,
            "strings_to_urls": self.strings_to_urls,
            "nan_inf_to_errors": self.nan_inf_to_errors,
            "default_date_format": self.default_date_format,
            "default_url_format": self.default_url_format,
            "excel2003_style": self.excel2003_style,
        }

        if is_chartsheet:
            worksheet = Chartsheet()
        else:
            worksheet = Worksheet()

        worksheet._initialize(init_data)

        self.worksheets_objs.append(worksheet)
        self.sheetnames.append(name)

        return worksheet

    def _check_sheetname(self, sheetname, is_chartsheet=False):
        # Check for valid worksheet names. We check the length, if it contains
        # any invalid chars and if the sheetname is unique in the workbook.
        invalid_char = re.compile(r"[\[\]:*?/\\]")

        # Increment the Sheet/Chart number used for default sheet names below.
        if is_chartsheet:
            self.chartname_count += 1
        else:
            self.sheetname_count += 1

        # Supply default Sheet/Chart sheetname if none has been defined.
        if sheetname is None:
            if is_chartsheet:
                sheetname = self.chart_name + str(self.chartname_count)
            else:
                sheetname = self.sheet_name + str(self.sheetname_count)

        # Check that sheet sheetname is <= 31. Excel limit.
        if len(sheetname) > 31:
            raise Exception("Excel worksheet name '%s' must be <= 31 chars." % sheetname)

        # Check that sheetname doesn't contain any invalid characters
        if invalid_char.search(sheetname):
            raise Exception("Invalid Excel character '[]:*?/\\' in sheetname '%s'" % sheetname)

        # Check that the worksheet name doesn't already exist since this is a
        # fatal Excel error. The check must be case insensitive like Excel.
        for worksheet in self.worksheets():
            if sheetname.lower() == worksheet.name.lower():
                raise Exception("Sheetname '%s', with case ignored, is already in use." % sheetname)

        return sheetname

    def _prepare_format_properties(self):
        # Prepare all Format properties prior to passing them to styles.py.

        # Separate format objects into XF and DXF formats.
        self._prepare_formats()

        # Set the font index for the format objects.
        self._prepare_fonts()

        # Set the number format index for the format objects.
        self._prepare_num_formats()

        # Set the border index for the format objects.
        self._prepare_borders()

        # Set the fill index for the format objects.
        self._prepare_fills()

    def _prepare_formats(self):
        # Iterate through the XF Format objects and separate them into
        # XF and DXF formats. The XF and DF formats then need to be sorted
        # back into index order rather than creation order.
        xf_formats = []
        dxf_formats = []

        # Sort into XF and DXF formats.
        for xf_format in self.formats:
            if xf_format.xf_index is not None:
                xf_formats.append(xf_format)

            if xf_format.dxf_index is not None:
                dxf_formats.append(xf_format)

        # Pre-extend the format lists.
        self.xf_formats = [None] * len(xf_formats)
        self.dxf_formats = [None] * len(dxf_formats)

        # Rearrange formats into index order.
        for xf_format in xf_formats:
            index = xf_format.xf_index
            self.xf_formats[index] = xf_format

        for dxf_format in dxf_formats:
            index = dxf_format.dxf_index
            self.dxf_formats[index] = dxf_format

    def _set_default_xf_indices(self):
        # Set the default index for each format. Only used for testing.

        formats = list(self.formats)

        # Delete the default url format.
        del formats[1]

        # Skip the default date format if set.
        if self.default_date_format is not None:
            del formats[1]

        # Set the remaining formats.
        for xf_format in formats:
            xf_format._get_xf_index()

    def _prepare_fonts(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default font elements.
        fonts = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_font_key()
            if key in fonts:
                # Font has already been used.
                xf_format.font_index = fonts[key]
                xf_format.has_font = 0
            else:
                # This is a new font.
                fonts[key] = index
                xf_format.font_index = index
                xf_format.has_font = 1
                index += 1

        self.font_count = index

        # For DXF formats we only need to check if the properties have changed.
        for xf_format in self.dxf_formats:
            # The only font properties that can change for a DXF format are:
            # color, bold, italic, underline and strikethrough.
            if (
                xf_format.font_color
                or xf_format.bold
                or xf_format.italic
                or xf_format.underline
                or xf_format.font_strikeout
            ):
                xf_format.has_dxf_font = 1

    def _prepare_num_formats(self):
        # User defined records in Excel start from index 0xA4.
        num_formats = {}
        index = 164
        num_format_count = 0

        for xf_format in self.xf_formats + self.dxf_formats:
            num_format = xf_format.num_format

            # Check if num_format is an index to a built-in number format.
            if not isinstance(num_format, str_types):
                xf_format.num_format_index = int(num_format)
                continue

            if num_format in num_formats:
                # Number xf_format has already been used.
                xf_format.num_format_index = num_formats[num_format]
            else:
                # Add a new number xf_format.
                num_formats[num_format] = index
                xf_format.num_format_index = index
                index += 1

                # Only increase font count for XF formats (not DXF formats).
                if xf_format.xf_index:
                    num_format_count += 1

        self.num_format_count = num_format_count

    def _prepare_borders(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default border elements.
        borders = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_border_key()

            if key in borders:
                # Border has already been used.
                xf_format.border_index = borders[key]
                xf_format.has_border = 0
            else:
                # This is a new border.
                borders[key] = index
                xf_format.border_index = index
                xf_format.has_border = 1
                index += 1

        self.border_count = index

        # For DXF formats we only need to check if the properties have changed.
        has_border = re.compile(r"[^0:]")

        for xf_format in self.dxf_formats:
            key = xf_format._get_border_key()

            if has_border.search(key):
                xf_format.has_dxf_border = 1

    def _prepare_fills(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default fill elements.
        # The user defined fill properties start from 2 since there are 2
        # default fills: patternType="none" and patternType="gray125".
        fills = {}
        index = 2  # Start from 2. See above.

        # Add the default fills.
        fills["0:0:0"] = 0
        fills["17:0:0"] = 1

        # Store the DXF colors separately since them may be reversed below.
        for xf_format in self.dxf_formats:
            if xf_format.pattern or xf_format.bg_color or xf_format.fg_color:
                xf_format.has_dxf_fill = 1
                xf_format.dxf_bg_color = xf_format.bg_color
                xf_format.dxf_fg_color = xf_format.fg_color

        for xf_format in self.xf_formats:
            # The following logical statements jointly take care of special
            # cases in relation to cell colors and patterns:
            # 1. For a solid fill (_pattern == 1) Excel reverses the role of
            # foreground and background colors, and
            # 2. If the user specifies a foreground or background color
            # without a pattern they probably wanted a solid fill, so we fill
            # in the defaults.
            if xf_format.pattern == 1 and xf_format.bg_color != 0 and xf_format.fg_color != 0:
                tmp = xf_format.fg_color
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = tmp

            if xf_format.pattern <= 1 and xf_format.bg_color != 0 and xf_format.fg_color == 0:
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = 0
                xf_format.pattern = 1

            if xf_format.pattern <= 1 and xf_format.bg_color == 0 and xf_format.fg_color != 0:
                xf_format.bg_color = 0
                xf_format.pattern = 1

            key = xf_format._get_fill_key()

            if key in fills:
                # Fill has already been used.
                xf_format.fill_index = fills[key]
                xf_format.has_fill = 0
            else:
                # This is a new fill.
                fills[key] = index
                xf_format.fill_index = index
                xf_format.has_fill = 1
                index += 1

        self.fill_count = index

    def _prepare_defined_names(self):
        # Iterate through the worksheets and store any defined names in
        # addition to any user defined names. Stores the defined names
        # for the Workbook.xml and the named ranges for App.xml.
        defined_names = self.defined_names

        for sheet in self.worksheets():
            # Check for Print Area settings.
            if sheet.autofilter_area:
                hidden = 1
                sheet_range = sheet.autofilter_area
                # Store the defined names.
                defined_names.append(["_xlnm._FilterDatabase", sheet.index, sheet_range, hidden])

            # Check for Print Area settings.
            if sheet.print_area_range:
                hidden = 0
                sheet_range = sheet.print_area_range
                # Store the defined names.
                defined_names.append(["_xlnm.Print_Area", sheet.index, sheet_range, hidden])

            # Check for repeat rows/cols referred to as Print Titles.
            if sheet.repeat_col_range or sheet.repeat_row_range:
                hidden = 0
                sheet_range = ""
                if sheet.repeat_col_range and sheet.repeat_row_range:
                    sheet_range = sheet.repeat_col_range + "," + sheet.repeat_row_range
                else:
                    sheet_range = sheet.repeat_col_range + sheet.repeat_row_range
                # Store the defined names.
                defined_names.append(["_xlnm.Print_Titles", sheet.index, sheet_range, hidden])

        defined_names = self._sort_defined_names(defined_names)
        self.defined_names = defined_names
        self.named_ranges = self._extract_named_ranges(defined_names)

    def _sort_defined_names(self, names):
        # Sort the list of list of internal and user defined names in
        # the same order as used by Excel.

        # Add a normalize name string to each list for sorting.
        for name_list in names:
            (defined_name, _, sheet_name, _) = name_list

            # Normalize the defined name by removing any leading '_xmln.'
            # from internal names and lowercasing the string.
            defined_name = defined_name.replace("_xlnm.", "").lower()

            # Normalize the sheetname by removing the leading quote and
            # lowercasing the string.
            sheet_name = sheet_name.lstrip("'").lower()

            name_list.append(defined_name + "::" + sheet_name)

        # Sort based on the normalized key.
        names.sort(key=operator.itemgetter(4))

        # Remove the extra key used for sorting.
        for name_list in names:
            name_list.pop()

        return names

    def _prepare_drawings(self):
        # Iterate through the worksheets and set up chart and image drawings.
        chart_ref_id = 0
        image_ref_id = 0
        drawing_id = 0
        x_dpi = 96
        y_dpi = 96

        for sheet in self.worksheets():
            chart_count = len(sheet.charts)
            image_count = len(sheet.images)
            shape_count = len(sheet.shapes)

            header_image_count = len(sheet.header_images)
            footer_image_count = len(sheet.footer_images)
            has_drawing = False

            if not (chart_count or image_count or shape_count or header_image_count or footer_image_count):
                continue

            # Don't increase the drawing_id header/footer images.
            if chart_count or image_count or shape_count:
                drawing_id += 1
                has_drawing = True

            # Prepare the worksheet charts.
            for index in range(chart_count):
                chart_ref_id += 1
                sheet._prepare_chart(index, chart_ref_id, drawing_id)

            # Prepare the worksheet images.
            for index in range(image_count):
                filename = sheet.images[index][2]
                image_data = sheet.images[index][10]
                (image_type, width, height, name, x_dpi, y_dpi) = self._get_image_properties(filename, image_data)
                image_ref_id += 1

                sheet._prepare_image(index, image_ref_id, drawing_id, width, height, name, image_type, x_dpi, y_dpi)

            # Prepare the worksheet shapes.
            for index in range(shape_count):
                sheet._prepare_shape(index, drawing_id)

            # Prepare the header images.
            for index in range(header_image_count):

                filename = sheet.header_images[index][0]
                image_data = sheet.header_images[index][1]
                position = sheet.header_images[index][2]

                (image_type, width, height, name, x_dpi, y_dpi) = self._get_image_properties(filename, image_data)

                image_ref_id += 1

                sheet._prepare_header_image(image_ref_id, width, height, name, image_type, position, x_dpi, y_dpi)

            # Prepare the footer images.
            for index in range(footer_image_count):

                filename = sheet.footer_images[index][0]
                image_data = sheet.footer_images[index][1]
                position = sheet.footer_images[index][2]

                (image_type, width, height, name, x_dpi, y_dpi) = self._get_image_properties(filename, image_data)

                image_ref_id += 1

                sheet._prepare_header_image(image_ref_id, width, height, name, image_type, position, x_dpi, y_dpi)

            if has_drawing:
                drawing = sheet.drawing
                self.drawings.append(drawing)

        # Remove charts that were created but not inserted into worksheets.
        for chart in self.charts[:]:
            if chart.id == -1:
                self.charts.remove(chart)

        # Sort the workbook charts references into the order that the were
        # written to the worksheets above.
        self.charts = sorted(self.charts, key=lambda chart: chart.id)

        self.drawing_count = drawing_id

    def _get_image_properties(self, filename, image_data):
        # Extract dimension information from the image file.
        height = 0
        width = 0
        x_dpi = 96
        y_dpi = 96

        if not image_data:
            # Open the image file and read in the data.
            fh = open(filename, "rb")
            data = fh.read()
        else:
            # Read the image data from the user supplied byte stream.
            data = image_data.getvalue()

        # Get the image filename without the path.
        image_name = os.path.basename(filename)

        # Look for some common image file markers.
        marker1 = (unpack("3s", data[1:4]))[0]
        marker2 = (unpack(">H", data[:2]))[0]
        marker3 = (unpack("2s", data[:2]))[0]

        if sys.version_info < (2, 6, 0):
            # Python 2.5/Jython.
            png_marker = "PNG"
            bmp_marker = "BM"
        else:
            # Eval the binary literals for Python 2.5/Jython compatibility.
            png_marker = eval("b'PNG'")
            bmp_marker = eval("b'BM'")

        if marker1 == png_marker:
            self.image_types["png"] = 1
            (image_type, width, height, x_dpi, y_dpi) = self._process_png(data)

        elif marker2 == 0xFFD8:
            self.image_types["jpeg"] = 1
            (image_type, width, height, x_dpi, y_dpi) = self._process_jpg(data)

        elif marker3 == bmp_marker:
            self.image_types["bmp"] = 1
            (image_type, width, height) = self._process_bmp(data)

        else:
            raise Exception("%s: Unknown or unsupported image file format." % filename)

        # Check that we found the required data.
        if not height or not width:
            raise Exception("%s: no size data found in image file." % filename)

        # Store image data to copy it into file container.
        self.images.append([filename, image_type, image_data])

        if not image_data:
            fh.close()

        return image_type, width, height, image_name, x_dpi, y_dpi

    def _process_png(self, data):
        # Extract width and height information from a PNG file.
        offset = 8
        data_length = len(data)
        end_marker = False
        width = 0
        height = 0
        x_dpi = 96
        y_dpi = 96

        # Look for numbers rather than strings for Python 2.6/3 compatibility.
        marker_ihdr = 0x49484452  # IHDR
        marker_phys = 0x70485973  # pHYs
        marker_iend = 0x49454E44  # IEND

        # Search through the image data to read the height and width in the
        # IHDR element. Also read the DPI in the pHYs element.
        while not end_marker and offset < data_length:

            length = (unpack(">I", data[offset + 0 : offset + 4]))[0]
            marker = (unpack(">I", data[offset + 4 : offset + 8]))[0]

            # Read the image dimensions.
            if marker == marker_ihdr:
                width = (unpack(">I", data[offset + 8 : offset + 12]))[0]
                height = (unpack(">I", data[offset + 12 : offset + 16]))[0]

            # Read the image DPI.
            if marker == marker_phys:
                x_density = (unpack(">I", data[offset + 8 : offset + 12]))[0]
                y_density = (unpack(">I", data[offset + 12 : offset + 16]))[0]
                units = (unpack("b", data[offset + 16 : offset + 17]))[0]

                if units == 1:
                    x_dpi = x_density * 0.0254
                    y_dpi = y_density * 0.0254

            if marker == marker_iend:
                end_marker = True
                continue

            offset = offset + length + 12

        return "png", width, height, x_dpi, y_dpi

    def _process_jpg(self, data):
        # Extract width and height information from a JPEG file.
        offset = 2
        data_length = len(data)
        end_marker = False
        width = 0
        height = 0
        x_dpi = 96
        y_dpi = 96

        # Search through the image data to read the height and width in the
        # 0xFFC0/C2 element. Also read the DPI in the 0xFFE0 element.
        while not end_marker and offset < data_length:

            marker = (unpack(">H", data[offset + 0 : offset + 2]))[0]
            length = (unpack(">H", data[offset + 2 : offset + 4]))[0]

            # Read the image dimensions.
            if marker == 0xFFC0 or marker == 0xFFC2:
                height = (unpack(">H", data[offset + 5 : offset + 7]))[0]
                width = (unpack(">H", data[offset + 7 : offset + 9]))[0]

            # Read the image DPI.
            if marker == 0xFFE0:
                units = (unpack("b", data[offset + 11 : offset + 12]))[0]
                x_density = (unpack(">H", data[offset + 12 : offset + 14]))[0]
                y_density = (unpack(">H", data[offset + 14 : offset + 16]))[0]

                if units == 1:
                    x_dpi = x_density
                    y_dpi = y_density

                if units == 2:
                    x_dpi = x_density * 2.54
                    y_dpi = y_density * 2.54

                # Workaround for incorrect dpi.
                if x_dpi == 1:
                    x_dpi = 96
                if y_dpi == 1:
                    y_dpi = 96

            if marker == 0xFFDA:
                end_marker = True
                continue

            offset = offset + length + 2

        return "jpeg", width, height, x_dpi, y_dpi

    def _process_bmp(self, data):
        # Extract width and height information from a BMP file.
        width = (unpack("<L", data[18:22]))[0]
        height = (unpack("<L", data[22:26]))[0]
        return "bmp", width, height

    def _extract_named_ranges(self, defined_names):
        # Extract the named ranges from the sorted list of defined names.
        # These are used in the App.xml file.
        named_ranges = []

        for defined_name in defined_names:

            name = defined_name[0]
            index = defined_name[1]
            sheet_range = defined_name[2]

            # Skip autoFilter ranges.
            if name == "_xlnm._FilterDatabase":
                continue

            # We are only interested in defined names with ranges.
            if "!" in sheet_range:
                sheet_name, _ = sheet_range.split("!", 1)

                # Match Print_Area and Print_Titles xlnm types.
                if name.startswith("_xlnm."):
                    xlnm_type = name.replace("_xlnm.", "")
                    name = sheet_name + "!" + xlnm_type
                elif index != -1:
                    name = sheet_name + "!" + name

                named_ranges.append(name)

        return named_ranges

    def _get_sheet_index(self, sheetname):
        # Convert a sheet name to its index. Return None otherwise.
        sheetname = sheetname.strip("'")

        if sheetname in self.sheetnames:
            return self.sheetnames.index(sheetname)
        else:
            return None

    def _prepare_vml(self):
        # Iterate through the worksheets and set up the VML objects.
        comment_id = 0
        vml_drawing_id = 0
        vml_data_id = 1
        vml_header_id = 0
        vml_shape_id = 1024
        vml_files = 0
        comment_files = 0
        has_button = False

        for sheet in self.worksheets():
            if not sheet.has_vml and not sheet.has_header_vml:
                continue

            vml_files += 1

            if sheet.has_vml:
                if sheet.has_comments:
                    comment_files += 1
                    comment_id += 1

                vml_drawing_id += 1

                count = sheet._prepare_vml_objects(vml_data_id, vml_shape_id, vml_drawing_id, comment_id)

                # Each VML should start with a shape id incremented by 1024.
                vml_data_id += 1 * int((1024 + count) / 1024)
                vml_shape_id += 1024 * int((1024 + count) / 1024)

            if sheet.has_header_vml:
                vml_header_id += 1
                vml_drawing_id += 1
                sheet._prepare_header_vml_objects(vml_header_id, vml_drawing_id)

            self.num_vml_files = vml_files
            self.num_comment_files = comment_files

            if len(sheet.buttons_list):
                has_button = True

                # Set the sheet vba_codename if it has a button and the
                # workbook has a vbaProject binary.
                if self.vba_project and sheet.vba_codename is None:
                    sheet.set_vba_name()

        # Add a font format for cell comments.
        if comment_files > 0:
            xf = self.add_format({"font_name": "Tahoma", "font_size": 8, "color_indexed": 81, "font_only": True})
            xf._get_xf_index()

        # Set the workbook vba_codename if one of the sheets has a button and
        # the workbook has a vbaProject binary.
        if has_button and self.vba_project and self.vba_codename is None:
            self.set_vba_name()

    def _prepare_tables(self):
        # Set the table ids for the worksheet tables.
        table_id = 0
        seen = {}

        for sheet in self.worksheets():
            table_count = len(sheet.tables)

            if not table_count:
                continue

            sheet._prepare_tables(table_id + 1, seen)
            table_id += table_count

    def _add_chart_data(self):
        # Add "cached" data to charts to provide the numCache and strCache
        # data for series and title/axis ranges.
        worksheets = {}
        seen_ranges = {}
        charts = []

        # Map worksheet names to worksheet objects.
        for worksheet in self.worksheets():
            worksheets[worksheet.name] = worksheet

        # Build a list of the worksheet charts including any combined charts.
        for chart in self.charts:
            charts.append(chart)
            if chart.combined:
                charts.append(chart.combined)

        for chart in charts:

            for c_range in chart.formula_ids.keys():
                r_id = chart.formula_ids[c_range]

                # Skip if the series has user defined data.
                if chart.formula_data[r_id] is not None:
                    if c_range not in seen_ranges or seen_ranges[c_range] is None:
                        data = chart.formula_data[r_id]
                        seen_ranges[c_range] = data
                    continue

                # Check to see if the data is already cached locally.
                if c_range in seen_ranges:
                    chart.formula_data[r_id] = seen_ranges[c_range]
                    continue

                # Convert the range formula to a sheet name and cell range.
                (sheetname, cells) = self._get_chart_range(c_range)

                # Skip if we couldn't parse the formula.
                if sheetname is None:
                    continue

                # Handle non-contiguous ranges like:
                #     (Sheet1!$A$1:$A$2,Sheet1!$A$4:$A$5).
                # We don't try to parse them. We just return an empty list.
                if sheetname.startswith("("):
                    chart.formula_data[r_id] = []
                    seen_ranges[c_range] = []
                    continue

                # Warn if the name is unknown since it indicates a user error
                # in a chart series formula.
                if sheetname not in worksheets:
                    warn(
                        "Unknown worksheet reference '%s' in range "
                        "'%s' passed to add_series()" % (force_unicode(sheetname), force_unicode(c_range))
                    )
                    chart.formula_data[r_id] = []
                    seen_ranges[c_range] = []
                    continue

                # Find the worksheet object based on the sheet name.
                worksheet = worksheets[sheetname]

                # Get the data from the worksheet table.
                data = worksheet._get_range_data(*cells)

                # TODO. Handle SST string ids if required.

                # Add the data to the chart.
                chart.formula_data[r_id] = data

                # Store range data locally to avoid lookup if seen again.
                seen_ranges[c_range] = data

    def _get_chart_range(self, c_range):
        # Convert a range formula such as Sheet1!$B$1:$B$5 into a sheet name
        # and cell range such as ( 'Sheet1', 0, 1, 4, 1 ).

        # Split the range formula into sheetname and cells at the last '!'.
        pos = c_range.rfind("!")
        if pos > 0:
            sheetname = c_range[:pos]
            cells = c_range[pos + 1 :]
        else:
            return None, None

        # Split the cell range into 2 cells or else use single cell for both.
        if cells.find(":") > 0:
            (cell_1, cell_2) = cells.split(":", 1)
        else:
            (cell_1, cell_2) = (cells, cells)

        # Remove leading/trailing quotes and convert escaped quotes to single.
        sheetname = sheetname.strip("'")
        sheetname = sheetname.replace("''", "'")

        try:
            # Get the row, col values from the Excel ranges. We do this in a
            # try block for ranges that can't be parsed such as defined names.
            (row_start, col_start) = xl_cell_to_rowcol(cell_1)
            (row_end, col_end) = xl_cell_to_rowcol(cell_2)
        except:
            return None, None

        # We only handle 1D ranges.
        if row_start != row_end and col_start != col_end:
            return None, None

        return sheetname, [row_start, col_start, row_end, col_end]

    def _prepare_sst_string_data(self):
        # Convert the SST string data from a dict to a list.
        self.str_table._sort_string_data()

    ###########################################################################
    #
    # XML methods.
    #
    ###########################################################################

    def _write_workbook(self):
        # Write <workbook> element.

        schema = "http://schemas.openxmlformats.org"
        xmlns = schema + "/spreadsheetml/2006/main"
        xmlns_r = schema + "/officeDocument/2006/relationships"

        attributes = [("xmlns", xmlns), ("xmlns:r", xmlns_r)]

        self._xml_start_tag("workbook", attributes)

    def _write_file_version(self):
        # Write the <fileVersion> element.

        app_name = "xl"
        last_edited = 4
        lowest_edited = 4
        rup_build = 4505

        attributes = [
            ("appName", app_name),
            ("lastEdited", last_edited),
            ("lowestEdited", lowest_edited),
            ("rupBuild", rup_build),
        ]

        if self.vba_project:
            attributes.append(("codeName", "{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}"))

        self._xml_empty_tag("fileVersion", attributes)

    def _write_workbook_pr(self):
        # Write <workbookPr> element.
        default_theme_version = 124226
        attributes = []

        if self.vba_codename:
            attributes.append(("codeName", self.vba_codename))
        if self.date_1904:
            attributes.append(("date1904", 1))

        attributes.append(("defaultThemeVersion", default_theme_version))

        self._xml_empty_tag("workbookPr", attributes)

    def _write_book_views(self):
        # Write <bookViews> element.
        self._xml_start_tag("bookViews")
        self._write_workbook_view()
        self._xml_end_tag("bookViews")

    def _write_workbook_view(self):
        # Write <workbookView> element.
        attributes = [
            ("xWindow", self.x_window),
            ("yWindow", self.y_window),
            ("windowWidth", self.window_width),
            ("windowHeight", self.window_height),
        ]

        # Store the tabRatio attribute when it isn't the default.
        if self.tab_ratio != 500:
            attributes.append(("tabRatio", self.tab_ratio))

        # Store the firstSheet attribute when it isn't the default.
        if self.worksheet_meta.firstsheet > 0:
            firstsheet = self.worksheet_meta.firstsheet + 1
            attributes.append(("firstSheet", firstsheet))

        # Store the activeTab attribute when it isn't the first sheet.
        if self.worksheet_meta.activesheet > 0:
            attributes.append(("activeTab", self.worksheet_meta.activesheet))

        self._xml_empty_tag("workbookView", attributes)

    def _write_sheets(self):
        # Write <sheets> element.
        self._xml_start_tag("sheets")

        id_num = 1
        for worksheet in self.worksheets():
            self._write_sheet(worksheet.name, id_num, worksheet.hidden)
            id_num += 1

        self._xml_end_tag("sheets")

    def _write_sheet(self, name, sheet_id, hidden):
        # Write <sheet> element.
        attributes = [("name", name), ("sheetId", sheet_id)]

        if hidden:
            attributes.append(("state", "hidden"))

        attributes.append(("r:id", "rId" + str(sheet_id)))

        self._xml_empty_tag("sheet", attributes)

    def _write_calc_pr(self):
        # Write the <calcPr> element.
        attributes = [("calcId", self.calc_id)]

        if self.calc_mode == "manual":
            attributes.append(("calcMode", self.calc_mode))
            attributes.append(("calcOnSave", "0"))
        elif self.calc_mode == "autoNoTable":
            attributes.append(("calcMode", self.calc_mode))

        if self.calc_on_load:
            attributes.append(("fullCalcOnLoad", "1"))

        self._xml_empty_tag("calcPr", attributes)

    def _write_defined_names(self):
        # Write the <definedNames> element.
        if not self.defined_names:
            return

        self._xml_start_tag("definedNames")

        for defined_name in self.defined_names:
            self._write_defined_name(defined_name)

        self._xml_end_tag("definedNames")

    def _write_defined_name(self, defined_name):
        # Write the <definedName> element.
        name = defined_name[0]
        sheet_id = defined_name[1]
        sheet_range = defined_name[2]
        hidden = defined_name[3]

        attributes = [("name", name)]

        if sheet_id != -1:
            attributes.append(("localSheetId", sheet_id))
        if hidden:
            attributes.append(("hidden", 1))

        self._xml_data_element("definedName", sheet_range, attributes)
Ejemplo n.º 9
0
class Workbook(xmlwriter.XMLwriter):
    """
    A class for writing the Excel XLSX Workbook file.


    """

    ###########################################################################
    #
    # Public API.
    #
    ###########################################################################

    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename
        self.tmpdir = options.get("tmpdir", None)
        self.date_1904 = options.get("date_1904", False)
        self.strings_to_numbers = options.get("strings_to_numbers", True)
        self.default_date_format = options.get("default_date_format", None)
        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = "Sheet"
        self.chart_name = "Chart"
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.optimization = options.get("constant_memory", 0)
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []

        # Add the default cell format.
        self.add_format({"xf_index": 0})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = self.add_format({"num_format": self.default_date_format})

    def __del__(self):
        """Close file in destructor if it hasn't been closed explicitly."""
        if not self.fileclosed:
            self.close()

    def add_worksheet(self, name=None):
        """
        Add a new worksheet to the Excel workbook.

        Args:
            name: The worksheet name. Defaults to 'Sheet1', etc.

        Returns:
            Reference to a worksheet object.

        """
        sheet_index = len(self.worksheets_objs)
        name = self._check_sheetname(name)

        # Initialisation data to pass to the worksheet.
        init_data = {
            "name": name,
            "index": sheet_index,
            "str_table": self.str_table,
            "worksheet_meta": self.worksheet_meta,
            "optimization": self.optimization,
            "tmpdir": self.tmpdir,
            "date_1904": self.date_1904,
            "strings_to_numbers": self.strings_to_numbers,
            "default_date_format": self.default_date_format,
        }

        worksheet = Worksheet()
        worksheet._initialize(init_data)

        self.worksheets_objs.append(worksheet)
        self.sheetnames.append(name)

        return worksheet

    def add_format(self, properties={}):
        """
        Add a new Format to the Excel Workbook.

        Args:
            properties: The format properties.

        Returns:
            Reference to a Format object.

        """
        xf_format = Format(properties, self.xf_format_indices, self.dxf_format_indices)

        # Store the format reference.
        self.formats.append(xf_format)

        return xf_format

    def add_chart(self, options):
        """
        Create a chart object.

        Args:
            options: The chart type and subtype options.

        Returns:
            Reference to a Chart object.

        """

        # Type must be specified so we can create the required chart instance.
        chart_type = options.get("type", "None")
        if chart_type is None:
            warn("Chart type must be defined in add_chart()")
            return

        if chart_type == "area":
            chart = ChartArea(options)
        elif chart_type == "bar":
            chart = ChartBar(options)
        elif chart_type == "column":
            chart = ChartColumn(options)
        elif chart_type == "line":
            chart = ChartLine(options)
        elif chart_type == "pie":
            chart = ChartPie(options)
        elif chart_type == "radar":
            chart = ChartRadar(options)
        elif chart_type == "scatter":
            chart = ChartScatter(options)
        elif chart_type == "stock":
            chart = ChartStock(options)
        else:
            warn("Unknown chart type '%s' in add_chart()" % chart_type)
            return

        # Set the embedded chart name if present.
        if "name" in options:
            chart.chart_name = options["name"]

        chart._set_embedded_config_data()
        self.charts.append(chart)

        return chart

    def close(self):
        """
        Call finalisation code and close file.

        Args:
            None.

        Returns:
            Nothing.

        """
        if not self.fileclosed:
            self.fileclosed = 1
            self._store_workbook()

    def set_properties(self, properties):
        """
        Set the document properties such as Title, Author etc.

        Args:
            properties: Dictionary of document properties.

        Returns:
            Nothing.

        """
        self.doc_properties = properties

    def define_name(self, name, formula):
        # Create a defined name in Excel. We handle global/workbook level
        # names and local/worksheet names.
        """
        Create a defined name in the workbook.

        Args:
            name:    The defined name.
            formula: The cell or range that the defined name refers to.

        Returns:
            Nothing.

        """
        sheet_index = None
        sheetname = ""

        # Remove the = sign from the formula if it exists.
        if formula.startswith("="):
            formula = formula.lstrip("=")

        # Local defined names are formatted like "Sheet1!name".
        sheet_parts = re.compile(r"^(.*)!(.*)$")
        match = sheet_parts.match(name)

        if match:
            sheetname = match.group(1)
            name = match.group(2)
            sheet_index = self._get_sheet_index(sheetname)

            # Warn if the sheet index wasn't found.
            if sheet_index is None:
                warn("Unknown sheet name '%s' in defined_name()" % sheetname)
                return -1
        else:
            # Use -1 to indicate global names.
            sheet_index = -1

        # Warn if the sheet name contains invalid chars as defined by Excel.
        if not re.match(r"^[a-zA-Z_\\][a-zA-Z_.]+", name):
            warn("Invalid Excel characters in defined_name(): '%s'" % name)
            return -1

        # Warn if the sheet name looks like a cell name.
        if re.match(r"^[a-zA-Z][a-zA-Z]?[a-dA-D]?[0-9]+$", name):
            warn("Name looks like a cell name in defined_name(): '%s'" % name)
            return -1

        self.defined_names.append([name, sheet_index, formula, False])

    def worksheets(self):
        """
        Return a list of the worksheet objects in the workbook.

        Args:
            None.

        Returns:
            A list of worksheet objects.

        """
        return self.worksheets_objs

    ###########################################################################
    #
    # Private API.
    #
    ###########################################################################

    def _assemble_xml_file(self):
        # Assemble and write the XML file.

        # Prepare format object for passing to Style.pm.
        self._prepare_format_properties()

        # Write the XML declaration.
        self._xml_declaration()

        # Write the workbook element.
        self._write_workbook()

        # Write the fileVersion element.
        self._write_file_version()

        # Write the workbookPr element.
        self._write_workbook_pr()

        # Write the bookViews element.
        self._write_book_views()

        # Write the sheets element.
        self._write_sheets()

        # Write the workbook defined names.
        self._write_defined_names()

        # Write the calcPr element.
        self._write_calc_pr()

        # Close the workbook tag.
        self._xml_end_tag("workbook")

        # Close the file.
        self._xml_close()

    def _store_workbook(self):
        # Assemble worksheets into a workbook.
        temp_dir = tempfile.mkdtemp(dir=self.tmpdir)
        packager = Packager()

        # Add a default worksheet if non have been added.
        if not self.worksheets():
            self.add_worksheet()

        # Ensure that at least one worksheet has been selected.
        if self.worksheet_meta.activesheet == 0:
            self.worksheets_objs[0].selected = 1
            self.worksheets_objs[0].hidden = 0

        # Set the active sheet.
        for sheet in self.worksheets():
            if sheet.index == self.worksheet_meta.activesheet:
                sheet.active = 1

        # Convert the SST strings data structure.
        self._prepare_sst_string_data()

        # Prepare the worksheet VML elements such as comments and buttons.
        self._prepare_vml()

        # Set the defined names for the worksheets such as Print Titles.
        self._prepare_defined_names()

        # Prepare the drawings, charts and images.
        self._prepare_drawings()

        # Add cached data to charts.
        self._add_chart_data()

        # Package the workbook.
        packager._add_workbook(self)
        packager._set_package_dir(temp_dir)
        packager._create_package()

        # Free up the Packager object.
        packager = None

        xlsx_file = ZipFile(self.filename, "w", compression=ZIP_DEFLATED)

        # Add separator to temp dir so we have a root to strip from paths.
        dir_root = os.path.join(temp_dir, "")

        # Iterate through files in the temp dir and add them to the xlsx file.
        for dirpath, _, filenames in os.walk(temp_dir):
            for name in filenames:
                abs_filename = os.path.join(dirpath, name)
                rel_filename = abs_filename.replace(dir_root, "")
                xlsx_file.write(abs_filename, rel_filename)

        shutil.rmtree(temp_dir)
        xlsx_file.close()

    def _check_sheetname(self, sheetname, is_chart=False):
        # Check for valid worksheet names. We check the length, if it contains
        # any invalid chars and if the sheetname is unique in the workbook.
        invalid_char = re.compile(r"[\[\]:*?/\\]")

        # Increment the Sheet/Chart number used for default sheet names below.
        if is_chart:
            self.chartname_count += 1
        else:
            self.sheetname_count += 1

        # Supply default Sheet/Chart sheetname if none has been defined.
        if sheetname is None:
            if is_chart:
                sheetname = self.chart_name + str(self.chartname_count)
            else:
                sheetname = self.sheet_name + str(self.sheetname_count)

        # Check that sheet sheetname is <= 31. Excel limit.
        if len(sheetname) > 31:
            raise Exception("Excel worksheet name '%s' must be <= 31 chars." % sheetname)

        # Check that sheetname doesn't contain any invalid characters
        if invalid_char.search(sheetname):
            raise Exception("Invalid Excel character '[]:*?/\\' in sheetname '%s'" % sheetname)

        # Check that the worksheet name doesn't already exist since this is a
        # fatal Excel error. The check must be case insensitive like Excel.
        for worksheet in self.worksheets():
            if sheetname.lower() == worksheet.name.lower():
                raise Exception("Sheetname '%s', with case ignored, is already in use." % sheetname)

        return sheetname

    def _prepare_format_properties(self):
        # Prepare all Format properties prior to passing them to styles.py.

        # Separate format objects into XF and DXF formats.
        self._prepare_formats()

        # Set the font index for the format objects.
        self._prepare_fonts()

        # Set the number format index for the format objects.
        self._prepare_num_formats()

        # Set the border index for the format objects.
        self._prepare_borders()

        # Set the fill index for the format objects.
        self._prepare_fills()

    def _prepare_formats(self):
        # Iterate through the XF Format objects and separate them into
        # XF and DXF formats. The XF and DF formats then need to be sorted
        # back into index order rather than creation order.
        xf_formats = []
        dxf_formats = []

        # Sort into XF and DXF formats.
        for xf_format in self.formats:
            if xf_format.xf_index is not None:
                xf_formats.append(xf_format)

            if xf_format.dxf_index is not None:
                dxf_formats.append(xf_format)

        # Pre-extend the format lists.
        self.xf_formats = [None] * len(xf_formats)
        self.dxf_formats = [None] * len(dxf_formats)

        # Rearrange formats into index order.
        for xf_format in xf_formats:
            index = xf_format.xf_index
            self.xf_formats[index] = xf_format

        for dxf_format in dxf_formats:
            index = dxf_format.dxf_index
            self.dxf_formats[index] = dxf_format

    def _set_default_xf_indices(self):
        # Set the default index for each format. Only used for testing.
        if self.default_date_format is not None:
            # Skip initialising format[1] if there is a default date format.
            for xf_format in self.formats[:1] + self.formats[2:]:
                xf_format._get_xf_index()
        else:
            for xf_format in self.formats:
                xf_format._get_xf_index()

    def _prepare_fonts(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default font elements.
        fonts = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_font_key()
            if key in fonts:
                # Font has already been used.
                xf_format.font_index = fonts[key]
                xf_format.has_font = 0
            else:
                # This is a new font.
                fonts[key] = index
                xf_format.font_index = index
                xf_format.has_font = 1
                index += 1

        self.font_count = index

        # For DXF formats we only need to check if the properties have changed.
        for xf_format in self.dxf_formats:
            # The only font properties that can change for a DXF format are:
            # color, bold, italic, underline and strikethrough.
            if (
                xf_format.font_color
                or xf_format.bold
                or xf_format.italic
                or xf_format.underline
                or xf_format.font_strikeout
            ):
                xf_format.has_dxf_font = 1

    def _prepare_num_formats(self):
        # User records is not None start from index 0xA4.
        num_formats = {}
        index = 164
        num_format_count = 0

        is_number = re.compile(r"^\d+$")
        is_zeroes = re.compile(r"^0+\d")

        for xf_format in self.xf_formats + self.dxf_formats:
            num_format = xf_format.num_format
            # Check if num_format is an index to a built-in number format.
            # Also check for a string of zeros, which is a valid number
            # format string but would evaluate to zero.

            try:
                if is_number.match(str(num_format)) and not is_zeroes.match(str(num_format)):
                    # Index to a built-in number xf_format.
                    xf_format.num_format_index = int(num_format)
                    continue
            except (TypeError, UnicodeEncodeError):
                pass

            if num_format in num_formats:
                # Number xf_format has already been used.
                xf_format.num_format_index = num_formats[num_format]
            else:
                # Add a new number xf_format.
                num_formats[num_format] = index
                xf_format.num_format_index = index
                index += 1

                # Only increase font count for XF formats (not DXF formats).
                if xf_format.xf_index:
                    num_format_count += 1

        self.num_format_count = num_format_count

    def _prepare_borders(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default border elements.
        borders = {}
        index = 0

        for xf_format in self.xf_formats:
            key = xf_format._get_border_key()

            if key in borders:
                # Border has already been used.
                xf_format.border_index = borders[key]
                xf_format.has_border = 0
            else:
                # This is a new border.
                borders[key] = index
                xf_format.border_index = index
                xf_format.has_border = 1
                index += 1

        self.border_count = index

        # For DXF formats we only need to check if the properties have changed.
        has_border = re.compile(r"[^0:]")

        for xf_format in self.dxf_formats:
            key = xf_format._get_border_key()

            if has_border.search(key):
                xf_format.has_dxf_border = 1

    def _prepare_fills(self):
        # Iterate through the XF Format objects and give them an index to
        # non-default fill elements.
        # The user defined fill properties start from 2 since there are 2
        # default fills: patternType="none" and patternType="gray125".
        fills = {}
        index = 2  # Start from 2. See above.

        # Add the default fills.
        fills["0:0:0"] = 0
        fills["17:0:0"] = 1

        # Store the DXF colours separately since them may be reversed below.
        for xf_format in self.dxf_formats:
            if xf_format.pattern or xf_format.bg_color or xf_format.fg_color:
                xf_format.has_dxf_fill = 1
                xf_format.dxf_bg_color = xf_format.bg_color
                xf_format.dxf_fg_color = xf_format.fg_color

        for xf_format in self.xf_formats:
            # The following logical statements jointly take care of special
            # cases in relation to cell colours and patterns:
            # 1. For a solid fill (_pattern == 1) Excel reverses the role of
            # foreground and background colours, and
            # 2. If the user specifies a foreground or background colour
            # without a pattern they probably wanted a solid fill, so we fill
            # in the defaults.
            if xf_format.pattern == 1 and xf_format.bg_color != 0 and xf_format.fg_color != 0:
                tmp = xf_format.fg_color
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = tmp

            if xf_format.pattern <= 1 and xf_format.bg_color != 0 and xf_format.fg_color == 0:
                xf_format.fg_color = xf_format.bg_color
                xf_format.bg_color = 0
                xf_format.pattern = 1

            if xf_format.pattern <= 1 and xf_format.bg_color == 0 and xf_format.fg_color != 0:
                xf_format.bg_color = 0
                xf_format.pattern = 1

            key = xf_format._get_fill_key()

            if key in fills:
                # Fill has already been used.
                xf_format.fill_index = fills[key]
                xf_format.has_fill = 0
            else:
                # This is a new fill.
                fills[key] = index
                xf_format.fill_index = index
                xf_format.has_fill = 1
                index += 1

        self.fill_count = index

    def _prepare_defined_names(self):
        # Iterate through the worksheets and store any defined names in
        # addition to any user defined names. Stores the defined names
        # for the Workbook.xml and the named ranges for App.xml.
        defined_names = self.defined_names

        for sheet in self.worksheets():
            # Check for Print Area settings.
            if sheet.autofilter_area:
                hidden = 1
                sheet_range = sheet.autofilter_area
                # Store the defined names.
                defined_names.append(["_xlnm._FilterDatabase", sheet.index, sheet_range, hidden])

            # Check for Print Area settings.
            if sheet.print_area_range:
                hidden = 0
                sheet_range = sheet.print_area_range
                # Store the defined names.
                defined_names.append(["_xlnm.Print_Area", sheet.index, sheet_range, hidden])

            # Check for repeat rows/cols referred to as Print Titles.
            if sheet.repeat_col_range or sheet.repeat_row_range:
                hidden = 0
                sheet_range = ""
                if sheet.repeat_col_range and sheet.repeat_row_range:
                    sheet_range = sheet.repeat_col_range + "," + sheet.repeat_row_range
                else:
                    sheet_range = sheet.repeat_col_range + sheet.repeat_row_range
                # Store the defined names.
                defined_names.append(["_xlnm.Print_Titles", sheet.index, sheet_range, hidden])

        defined_names = self._sort_defined_names(defined_names)
        self.defined_names = defined_names
        self.named_ranges = self._extract_named_ranges(defined_names)

    def _sort_defined_names(self, names):
        # Sort the list of list of internal and user defined names in
        # the same order as used by Excel.

        # Add a normalise name string to each list for sorting.
        for name_list in names:
            (defined_name, _, sheet_name, _) = name_list

            # Normalise the defined name by removing any leading '_xmln.'
            # from internal names and lowercasing the string.
            defined_name = defined_name.replace("_xlnm.", "").lower()

            # Normalise the sheetname by removing the leading quote and
            # lowercasing the string.
            sheet_name = sheet_name.lstrip("'").lower()

            name_list.append(defined_name + "::" + sheet_name)

        # Remove the extra key for sorting.
        names.sort(key=operator.itemgetter(4))

        for name_list in names:
            name_list.pop()

        return names

    def _prepare_drawings(self):
        # Iterate through the worksheets and set up chart and image drawings.
        chart_ref_id = 0
        image_ref_id = 0
        drawing_id = 0

        for sheet in self.worksheets():
            chart_count = len(sheet.charts)
            image_count = len(sheet.images)
            shape_count = len(sheet.shapes)

            if not (chart_count + image_count + shape_count):
                continue

            drawing_id += 1

            for index in range(chart_count):
                chart_ref_id += 1
                sheet._prepare_chart(index, chart_ref_id, drawing_id)

            for index in range(image_count):
                filename = sheet.images[index][2]
                (image_type, width, height, name) = self._get_image_properties(filename)
                image_ref_id += 1

                sheet._prepare_image(index, image_ref_id, drawing_id, width, height, name, image_type)

            # for index in range(shape_count):
            #    sheet._prepare_shape(index, drawing_id)

            drawing = sheet.drawing
            self.drawings.append(drawing)

        # Sort the workbook charts references into the order that the were
        # written from the worksheets above.
        self.charts = sorted(self.charts, key=lambda chart: chart.id)

        self.drawing_count = drawing_id

    def _get_image_properties(self, filename):
        # Extract dimension information from the image file.
        height = 0
        width = 0

        # Open the image file and read in the data.
        fh = open(filename, "rb")
        data = fh.read()

        # Get the image filename without the path.
        image_name = os.path.basename(filename)

        # Look for some common image file markers.
        marker1 = (unpack("3s", data[1:4]))[0]
        marker2 = (unpack(">H", data[:2]))[0]
        marker3 = (unpack("4s", data[6:10]))[0]
        marker4 = (unpack("2s", data[:2]))[0]

        if marker1 == b"PNG":
            self.image_types["png"] = 1
            (image_type, width, height) = self._process_png(data)

        elif marker2 == 0xFFD8 and (marker3 == b"JFIF" or marker3 == b"EXIF"):
            self.image_types["jpeg"] = 1
            (image_type, width, height) = self._process_jpg(data)

        elif marker4 == b"BM":
            self.image_types["bmp"] = 1
            (image_type, width, height) = self._process_bmp(data)

        else:
            raise Exception("%s: Unknown or unsupported file type." % filename)

        # Check that we found the required data.
        if not height or not width:
            raise Exception("%s: no size data found in image file." % filename)

        # Store image data to copy it into file container.
        self.images.append([filename, image_type])

        fh.close()
        return (image_type, width, height, image_name)

    def _process_png(self, data):
        # Extract width and height information from a PNG file.
        width = (unpack(">I", data[16:20]))[0]
        height = (unpack(">I", data[20:24]))[0]

        return ("png", width, height)

    def _process_jpg(self, data):
        # Extract width and height information from a JPEG file.
        offset = 2
        data_length = len(data)

        # Search through the image data to find the 0xFFC0 marker.
        # The height and width are contained in the data for that
        # sub-element.
        found = 0
        while not found and offset < data_length:

            marker = (unpack(">H", data[offset + 0 : offset + 2]))[0]
            length = (unpack(">H", data[offset + 2 : offset + 4]))[0]

            if marker == 0xFFC0 or marker == 0xFFC2:
                height = (unpack(">H", data[offset + 5 : offset + 7]))[0]
                width = (unpack(">H", data[offset + 7 : offset + 9]))[0]
                found = 1
                continue

            offset = offset + length + 2

            if marker == 0xFFDA:
                found = 1
                continue

        return ("jpeg", width, height)

    def _process_bmp(self, data):
        # Extract width and height information from a BMP file.
        width = (unpack("<L", data[18:22]))[0]
        height = (unpack("<L", data[22:26]))[0]
        return ("bmp", width, height)

    def _extract_named_ranges(self, defined_names):
        # Extract the named ranges from the sorted list of defined names.
        # These are used in the App.xml file.
        named_ranges = []

        for defined_name in defined_names:

            name = defined_name[0]
            index = defined_name[1]
            sheet_range = defined_name[2]

            # Skip autoFilter ranges.
            if name == "_xlnm._FilterDatabase":
                continue

            # We are only interested in defined names with ranges.
            if "!" in sheet_range:
                sheet_name, _ = sheet_range.split("!", 1)

                # Match Print_Area and Print_Titles xlnm types.
                if name.startswith("_xlnm."):
                    xlnm_type = name.replace("_xlnm.", "")
                    name = sheet_name + "!" + xlnm_type
                elif index != -1:
                    name = sheet_name + "!" + name

                named_ranges.append(name)

        return named_ranges

    def _get_sheet_index(self, sheetname):
        # Convert a sheet name to its index. Return None otherwise.
        sheetname = sheetname.strip("'")

        if sheetname in self.sheetnames:
            return self.sheetnames.index(sheetname)
        else:
            return None

    #
    # Iterate through the worksheets and set up the VML objects.
    #
    def _prepare_vml(self):
        comment_id = 0
        vml_data_id = 1
        vml_shape_id = 1024
        vml_files = 0
        comment_files = 0

        for sheet in self.worksheets():
            if not sheet.has_vml:
                continue

            vml_files += 1

            if sheet.has_comments:
                comment_files += 1

            comment_id += 1
            count = sheet._prepare_vml_objects(vml_data_id, vml_shape_id, comment_id)

            # Each VML file should start with a shape id incremented by 1024.
            vml_data_id += 1 * int((1024 + count) / 1024)
            vml_shape_id += 1024 * int((1024 + count) / 1024)

        self.num_vml_files = vml_files
        self.num_comment_files = comment_files

        # Add a font format for cell comments.
        if comment_files > 0:
            xf = self.add_format({"font_name": "Tahoma", "font_size": 8, "color_indexed": 81, "font_only": True})
            xf._get_xf_index()

    def _add_chart_data(self):
        # Add "cached" data to charts to provide the numCache and strCacher
        # data for series and title/axis ranges.
        worksheets = {}
        seen_ranges = {}

        # Map worksheet names to worksheet objects.
        for worksheet in self.worksheets():
            worksheets[worksheet.name] = worksheet

        for chart in self.charts:

            for c_range in chart.formula_ids.keys():
                r_id = chart.formula_ids[c_range]

                # Skip if the series has user defined data.
                if chart.formula_data[r_id] is not None:
                    if not c_range in seen_ranges or seen_ranges[c_range] is None:
                        data = chart.formula_data[r_id]
                        seen_ranges[c_range] = data
                    continue

                # Check to see if the data is already cached locally.
                if c_range in seen_ranges:
                    chart.formula_data[r_id] = seen_ranges[c_range]
                    continue

                # Convert the range formula to a sheet name and cell range.
                (sheetname, cells) = self._get_chart_range(c_range)

                # Skip if we couldn't parse the formula.
                if sheetname is None:
                    continue

                # Die if the name is unknown since it indicates a user error in
                # a chart series formula.
                if not sheetname in worksheets:
                    warn(
                        "Unknown worksheet reference '%s' in range "
                        "'%s' passed to add_series()" % (sheetname, c_range)
                    )

                # Find the worksheet object based on the sheet name.
                worksheet = worksheets[sheetname]

                # Get the data from the worksheet table.
                data = worksheet._get_range_data(*cells)

                # TODO
                #   # Ignore rich strings for now. Deparse later if necessary.
                #        if token =~ m{^<r>} and token =~ m{</r>$}:
                #            token = ''

                # Add the data to the chart.
                chart.formula_data[r_id] = data

                # Store range data locally to avoid lookup if seen again.
                seen_ranges[c_range] = data

    def _get_chart_range(self, c_range):
        # Convert a range formula such as Sheet1!$B$1:$B$5 into a sheet name
        # and cell range such as ( 'Sheet1', 0, 1, 4, 1 ).

        # Split the range formula into sheetname and cells at the last '!'.
        # TODO. Fix this to match from right.
        pos = c_range.find("!")
        if pos > 0:
            sheetname, cells = c_range.split("!")
        else:
            return None

        # Split the cell range into 2 cells or else use single cell for both.
        if cells.find(":") > 0:
            (cell_1, cell_2) = cells.split(":")
        else:
            (cell_1, cell_2) = (cells, cells)

        # Remove leading/trailing quotes and convert escaped quotes to single.
        sheetname = sheetname.strip("'")
        sheetname = sheetname.replace("''", "'")

        (row_start, col_start) = xl_cell_to_rowcol(cell_1)
        (row_end, col_end) = xl_cell_to_rowcol(cell_2)

        # Check that we have a 1D range only.
        if row_start != row_end and col_start != col_end:
            return None

        return (sheetname, [row_start, col_start, row_end, col_end])

    def _prepare_sst_string_data(self):
        # Convert the SST string data from a dict to a list.
        self.str_table._sort_string_data()

    ###########################################################################
    #
    # XML methods.
    #
    ###########################################################################

    def _write_workbook(self):
        # Write <workbook> element.

        schema = "http://schemas.openxmlformats.org"
        xmlns = schema + "/spreadsheetml/2006/main"
        xmlns_r = schema + "/officeDocument/2006/relationships"

        attributes = [("xmlns", xmlns), ("xmlns:r", xmlns_r)]

        self._xml_start_tag("workbook", attributes)

    def _write_file_version(self):
        # Write the <fileVersion> element.

        app_name = "xl"
        last_edited = 4
        lowest_edited = 4
        rup_build = 4505

        attributes = [
            ("appName", app_name),
            ("lastEdited", last_edited),
            ("lowestEdited", lowest_edited),
            ("rupBuild", rup_build),
        ]

        if self.vba_project:
            attributes.append(("codeName", "{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}"))

        self._xml_empty_tag("fileVersion", attributes)

    def _write_workbook_pr(self):
        # Write <workbookPr> element.
        default_theme_version = 124226
        attributes = []

        if self.vba_codename:
            attributes.append(("codeName", self.vba_codename))
        if self.date_1904:
            attributes.append(("date1904", 1))

        attributes.append(("defaultThemeVersion", default_theme_version))

        self._xml_empty_tag("workbookPr", attributes)

    def _write_book_views(self):
        # Write <bookViews> element.
        self._xml_start_tag("bookViews")
        self._write_workbook_view()
        self._xml_end_tag("bookViews")

    def _write_workbook_view(self):
        # Write <workbookView> element.
        attributes = [
            ("xWindow", self.x_window),
            ("yWindow", self.y_window),
            ("windowWidth", self.window_width),
            ("windowHeight", self.window_height),
        ]

        # Store the tabRatio attribute when it isn't the default.
        if self.tab_ratio != 500:
            attributes.append(("tabRatio", self.tab_ratio))

        # Store the firstSheet attribute when it isn't the default.
        if self.worksheet_meta.firstsheet > 0:
            firstsheet = self.worksheet_meta.firstsheet + 1
            attributes.append(("firstSheet", firstsheet))

        # Store the activeTab attribute when it isn't the first sheet.
        if self.worksheet_meta.activesheet > 0:
            attributes.append(("activeTab", self.worksheet_meta.activesheet))

        self._xml_empty_tag("workbookView", attributes)

    def _write_sheets(self):
        # Write <sheets> element.
        self._xml_start_tag("sheets")

        id_num = 1
        for worksheet in self.worksheets():
            self._write_sheet(worksheet.name, id_num, worksheet.hidden)
            id_num += 1

        self._xml_end_tag("sheets")

    def _write_sheet(self, name, sheet_id, hidden):
        # Write <sheet> element.
        attributes = [("name", name), ("sheetId", sheet_id)]

        if hidden:
            attributes.append(("state", "hidden"))

        attributes.append(("r:id", "rId" + str(sheet_id)))

        self._xml_empty_tag("sheet", attributes)

    def _write_calc_pr(self):
        # Write the <calcPr> element.

        calc_id = "124519"

        attributes = [("calcId", calc_id)]

        self._xml_empty_tag("calcPr", attributes)

    def _write_defined_names(self):
        # Write the <definedNames> element.
        if not self.defined_names:
            return

        self._xml_start_tag("definedNames")

        for defined_name in self.defined_names:
            self._write_defined_name(defined_name)

        self._xml_end_tag("definedNames")

    def _write_defined_name(self, defined_name):
        # Write the <definedName> element.
        name = defined_name[0]
        sheet_id = defined_name[1]
        sheet_range = defined_name[2]
        hidden = defined_name[3]

        attributes = [("name", name)]

        if sheet_id != -1:
            attributes.append(("localSheetId", sheet_id))
        if hidden:
            attributes.append(("hidden", 1))

        self._xml_data_element("definedName", sheet_range, attributes)
Ejemplo n.º 10
0
    def __init__(self, filename=None, options={}):
        """
        Constructor.

        """

        super(Workbook, self).__init__()

        self.filename = filename
        self.tmpdir = options.get("tmpdir", None)
        self.date_1904 = options.get("date_1904", False)
        self.strings_to_numbers = options.get("strings_to_numbers", False)
        self.strings_to_formulas = options.get("strings_to_formulas", True)
        self.strings_to_urls = options.get("strings_to_urls", True)
        self.default_date_format = options.get("default_date_format", None)
        self.optimization = options.get("constant_memory", False)
        self.in_memory = options.get("in_memory", False)
        self.worksheet_meta = WorksheetMeta()
        self.selected = 0
        self.fileclosed = 0
        self.filehandle = None
        self.internal_fh = 0
        self.sheet_name = "Sheet"
        self.chart_name = "Chart"
        self.sheetname_count = 0
        self.chartname_count = 0
        self.worksheets_objs = []
        self.charts = []
        self.drawings = []
        self.sheetnames = []
        self.formats = []
        self.xf_formats = []
        self.xf_format_indices = {}
        self.dxf_formats = []
        self.dxf_format_indices = {}
        self.palette = []
        self.font_count = 0
        self.num_format_count = 0
        self.defined_names = []
        self.named_ranges = []
        self.custom_colors = []
        self.doc_properties = {}
        self.localtime = datetime.now()
        self.num_vml_files = 0
        self.num_comment_files = 0
        self.x_window = 240
        self.y_window = 15
        self.window_width = 16095
        self.window_height = 9660
        self.tab_ratio = 500
        self.str_table = SharedStringTable()
        self.vba_project = None
        self.vba_codename = None
        self.image_types = {}
        self.images = []
        self.border_count = 0
        self.fill_count = 0
        self.drawing_count = 0
        self.calc_mode = "auto"
        self.calc_on_load = True

        # We can't do 'constant_memory' mode while doing 'in_memory' mode.
        if self.in_memory:
            self.optimization = False

        # Add the default cell format.
        self.add_format({"xf_index": 0})

        # Add a default URL format.
        self.default_url_format = self.add_format({"color": "blue", "underline": 1})

        # Add the default date format.
        if self.default_date_format is not None:
            self.default_date_format = self.add_format({"num_format": self.default_date_format})