def get_cells(self, min_row, min_col, max_row, max_col): p = iterparse(self.xml_source) for _event, element in p: if element.tag == '{%s}c' % SHEET_MAIN_NS: coord = element.get('r') column_str, row = RE_COORDINATE.match(coord).groups() row = int(row) column = column_index_from_string(column_str) if min_col <= column <= max_col and min_row <= row <= max_row: data_type = element.get('t', 'n') style_id = element.get('s') formula = element.findtext('{%s}f' % SHEET_MAIN_NS) value = element.findtext('{%s}v' % SHEET_MAIN_NS) if formula is not None and not self.parent.data_only: data_type = Cell.TYPE_FORMULA value = "=" + formula yield RawCell(row, column_str, coord, value, data_type, style_id, None) # sub-elements of cells should be skipped if (element.tag == '{%s}v' % SHEET_MAIN_NS or element.tag == '{%s}f' % SHEET_MAIN_NS): continue element.clear()
def fast_parse(ws, xml_source, string_table, style_table): source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in ifilter(filter_cells, it): value = element.findtext( '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') if value is not None: coordinate = element.get('r') data_type = element.get('t', 'n') style_id = element.get('s') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) # to avoid memory exhaustion, clear the item after use element.clear()
def iter_rows(workbook_name, sheet_name, xml_source, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table)
def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries( range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 style_table = read_style_table(archive.read(ARC_STYLE)) source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
def read_dimension(xml_source): min_row = min_col = max_row = max_col = None source = _get_xml_iter(xml_source) it = iterparse(source) for event, el in it: if el.tag == '{%s}dimension' % SHEET_MAIN_NS: dim = el.get("ref") if ':' in dim: start, stop = dim.split(':') else: start = stop = dim min_col, min_row = coordinate_from_string(start) max_col, max_row = coordinate_from_string(stop) return min_col, min_row, max_col, max_row if el.tag == '{%s}row' % SHEET_MAIN_NS: row = el.get("r") if min_row is None: min_row = int(row) span = el.get("spans") start, stop = span.split(":") if min_col is None: min_col = int(start) max_col = int(stop) else: min_col = min(min_col, int(start)) max_col = max(max_col, int(stop)) max_row = int(row) warn("Unsized worksheet") return get_column_letter(min_col), min_row, get_column_letter(max_col), max_row
def read_dimension(xml_source): source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in it: if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension': ref = element.get('ref') if ':' in ref: min_range, max_range = ref.split(':') else: min_range = max_range = ref min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) return min_col, min_row, max_col, max_row else: element.clear() return None
def fast_parse(ws, xml_source, string_table, style_table): source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in ifilter(filter_cells, it): value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') if value is not None: coordinate = element.get('r') data_type = element.get('t', 'n') style_id = element.get('s') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) # to avoid memory exhaustion, clear the item after use element.clear()
def read_dimension(xml_source): source = _get_xml_iter(xml_source) it = iterparse(source) smax_col = None smax_row = None smin_col = None smin_row = None for event, element in it: if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension': ref = element.get('ref') if ':' in ref: min_range, max_range = ref.split(':') else: min_range = max_range = ref min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) return min_col, min_row, max_col, max_row if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c': # Supposedly the dimension is mandatory, but in practice it can be # left off sometimes, if so, observe the max/min extants and return # those instead. col, row = coordinate_from_string(element.get('r')) if smin_row is None: #initialize the observed max/min values smin_col = smax_col = col smin_row = smax_row = row else: #Keep track of the seen max and min (fallback if there's no dimension) smin_col = min(smin_col, col) smin_row = min(smin_row, row) smax_col = max(smax_col, col) smax_row = max(smax_row, row) else: element.clear() return smin_col, smin_row, smax_col, smax_row
def parse(self): stream = _get_xml_iter(self.source) it = iterparse(stream) dispatcher = { '{%s}c' % SHEET_MAIN_NS: self.parse_cell, '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge, '{%s}cols' % SHEET_MAIN_NS: self.parse_column_dimensions, '{%s}sheetData' % SHEET_MAIN_NS: self.parse_row_dimensions, '{%s}printOptions' % SHEET_MAIN_NS: self.parse_print_options, '{%s}pageMargins' % SHEET_MAIN_NS: self.parse_margins, '{%s}pageSetup' % SHEET_MAIN_NS: self.parse_page_setup, '{%s}headerFooter' % SHEET_MAIN_NS: self.parse_header_footer, '{%s}conditionalFormatting' % SHEET_MAIN_NS: self.parser_conditional_formatting } for event, element in it: tag_name = element.tag if tag_name in dispatcher: dispatcher[tag_name](element)
def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
def fast_parse(ws, xml_source, string_table, style_table): xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) mergeCells = root.find(QName(xmlns, 'mergeCells').text) if mergeCells is not None: mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text) for mergeCell in mergeCellNodes: ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) formula_table = {} import re from openpyxl.cell import column_index_from_string ALL_COORD_RE = re.compile('([$]?)([A-Z]+)([$]?)(\d+)') def joffset_cell(start_coord,stop_coord,target_coord): start_col,start_row = column_index_from_string(start_coord[1]),int(start_coord[3]) stop_col,stop_row = column_index_from_string(stop_coord[1]),int(stop_coord[3]) target_col,target_row = column_index_from_string(target_coord[1]),int(target_coord[3]) diff_row = stop_row - start_row diff_col = stop_col - start_col if target_coord[2]: out_row = target_row else: out_row = target_row + diff_row if target_coord[0]: print target_coord out_col = target_col else: out_col = target_col + diff_col return get_column_letter(out_col) + str(out_row) def joffset_formula(start,stop,formula): start_coord = ALL_COORD_RE.match(start).groups() stop_coord = ALL_COORD_RE.match(stop).groups() return ALL_COORD_RE.sub(lambda x: joffset_cell(start_coord,stop_coord,x.groups()),formula) for event, element in filter(filter_cells, it): formula = element.find('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f') value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') coordinate = element.get('r') if formula is not None: formula_text = '' if value is None: value = SubElement(element,'v') if formula.text: formula_text = formula.text if formula.get('t',None) == 'shared': #if there is a formula, add it to teh ref formula_table[formula.get('si')] = {'text': formula_text, 'cell': coordinate } else: shared_formula = formula_table[formula.get('si')] formula_text = joffset_formula(shared_formula['cell'],coordinate,shared_formula['text']) value = '=' + formula_text style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if formula: dat_type = 'f' if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find(QName(xmlns, 'cols').text) if cols is not None: colNodes = cols.findall(QName(xmlns, 'col').text) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = col.get('style') printOptions = root.find(QName(xmlns, 'printOptions').text) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find(QName(xmlns, 'pageMargins').text) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find(QName(xmlns, 'pageSetup').text) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find(QName(xmlns, 'headerFooter').text) if headerFooter is not None: oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text) if oddHeader is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text) if oddFooter is not None: ws.header_footer.setFooter(oddFooter.text)
def fast_parse(ws, xml_source, string_table, style_table): xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) mergeCells = root.find(QName(xmlns, 'mergeCells').text) if mergeCells is not None: mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text) for mergeCell in mergeCellNodes: ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in filter(filter_cells, it): value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') coordinate = element.get('r') style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find(QName(xmlns, 'cols').text) if cols is not None: colNodes = cols.findall(QName(xmlns, 'col').text) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = col.get('style') printOptions = root.find(QName(xmlns, 'printOptions').text) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find(QName(xmlns, 'pageMargins').text) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find(QName(xmlns, 'pageSetup').text) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find(QName(xmlns, 'headerFooter').text) if headerFooter is not None: oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text) if oddHeader is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text) if oddFooter is not None: ws.header_footer.setFooter(oddFooter.text)
def fast_parse(ws, xml_source, string_table, style_table): root = fromstring(xml_source) guess_types = ws.parent._guess_types mergeCells = root.find('{%s}mergeCells' % SHEET_MAIN_NS) if mergeCells is not None: for mergeCell in mergeCells.findall('{%s}mergeCell' % SHEET_MAIN_NS): ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in filter(filter_cells, it): value = element.findtext('{%s}v' % SHEET_MAIN_NS) formula = element.findtext('{%s}f' % SHEET_MAIN_NS) coordinate = element.get('r') style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) if formula is not None: value = "=" + str(formula) if not guess_types and not formula: ws.cell(coordinate).set_explicit_value(value=value, data_type=data_type) else: ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find('{%s}cols' % SHEET_MAIN_NS) if cols is not None: colNodes = cols.findall('{%s}col' % SHEET_MAIN_NS) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = col.get('style') sheetData = root.find('{%s}sheetData' % SHEET_MAIN_NS) if sheetData is not None: rowNodes = sheetData.findall('{%s}row' % SHEET_MAIN_NS) for row in rowNodes: rowId = int(row.get('r')) if rowId not in ws.row_dimensions: ws.row_dimensions[rowId] = RowDimension(rowId) if row.get('ht') is not None: ws.row_dimensions[rowId].height = float(row.get('ht')) printOptions = root.find('{%s}printOptions' % SHEET_MAIN_NS) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find('{%s}pageMargins' % SHEET_MAIN_NS) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find('{%s}pageSetup' % SHEET_MAIN_NS) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find('{%s}headerFooter' % SHEET_MAIN_NS) if headerFooter is not None: oddHeader = headerFooter.find('{%s}oddHeader' % SHEET_MAIN_NS) if oddHeader is not None and oddHeader.text is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find('{%s}oddFooter' % SHEET_MAIN_NS) if oddFooter is not None and oddFooter.text is not None: ws.header_footer.setFooter(oddFooter.text)
def fast_parse(ws, xml_source, string_table, style_table, color_index=None): root = fromstring(xml_source) guess_types = ws.parent._guess_types mergeCells = root.find('{%s}mergeCells' % SHEET_MAIN_NS) if mergeCells is not None: for mergeCell in mergeCells.findall('{%s}mergeCell' % SHEET_MAIN_NS): ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in filter(filter_cells, it): value = element.findtext('{%s}v' % SHEET_MAIN_NS) formula = element.find('{%s}f' % SHEET_MAIN_NS) coordinate = element.get('r') style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) if formula is not None: if formula.text: value = "=" + str(formula.text) else: value = "=" formula_type = formula.get('t') if formula_type: ws.formula_attributes[coordinate] = {'t': formula_type} if formula.get('si'): # Shared group index for shared formulas ws.formula_attributes[coordinate]['si'] = formula.get('si') if formula.get('ref'): # Range for shared formulas ws.formula_attributes[coordinate]['ref'] = formula.get('ref') if not guess_types and formula is None: ws.cell(coordinate).set_explicit_value(value=value, data_type=data_type) else: ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find('{%s}cols' % SHEET_MAIN_NS) if cols is not None: colNodes = cols.findall('{%s}col' % SHEET_MAIN_NS) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 # Ignore ranges that go up to the max column 16384. Columns need to be extended to handle # ranges without creating an entry for every single one. if max != 16384: for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = style_table.get(int(col.get('style'))) sheetData = root.find('{%s}sheetData' % SHEET_MAIN_NS) if sheetData is not None: rowNodes = sheetData.findall('{%s}row' % SHEET_MAIN_NS) for row in rowNodes: rowId = int(row.get('r')) if rowId not in ws.row_dimensions: ws.row_dimensions[rowId] = RowDimension(rowId) if row.get('ht') is not None: ws.row_dimensions[rowId].height = float(row.get('ht')) printOptions = root.find('{%s}printOptions' % SHEET_MAIN_NS) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find('{%s}pageMargins' % SHEET_MAIN_NS) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find('{%s}pageSetup' % SHEET_MAIN_NS) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find('{%s}headerFooter' % SHEET_MAIN_NS) if headerFooter is not None: oddHeader = headerFooter.find('{%s}oddHeader' % SHEET_MAIN_NS) if oddHeader is not None and oddHeader.text is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find('{%s}oddFooter' % SHEET_MAIN_NS) if oddFooter is not None and oddFooter.text is not None: ws.header_footer.setFooter(oddFooter.text) conditionalFormattingNodes = root.findall('{%s}conditionalFormatting' % SHEET_MAIN_NS) rules = {} for cf in conditionalFormattingNodes: if not cf.get('sqref'): # Potentially flag - this attribute should always be present. continue range_string = cf.get('sqref') cfRules = cf.findall('{%s}cfRule' % SHEET_MAIN_NS) rules[range_string] = [] for cfRule in cfRules: if not cfRule.get('type') or cfRule.get('type') == 'dataBar': # dataBar conditional formatting isn't supported, as it relies on the complex <extLst> tag continue rule = {'type': cfRule.get('type')} for attr in ConditionalFormatting.rule_attributes: if cfRule.get(attr) is not None: rule[attr] = cfRule.get(attr) formula = cfRule.findall('{%s}formula' % SHEET_MAIN_NS) for f in formula: if 'formula' not in rule: rule['formula'] = [] rule['formula'].append(f.text) colorScale = cfRule.find('{%s}colorScale' % SHEET_MAIN_NS) if colorScale is not None: rule['colorScale'] = {'cfvo': [], 'color': []} cfvoNodes = colorScale.findall('{%s}cfvo' % SHEET_MAIN_NS) for node in cfvoNodes: cfvo = {} if node.get('type') is not None: cfvo['type'] = node.get('type') if node.get('val') is not None: cfvo['val'] = node.get('val') rule['colorScale']['cfvo'].append(cfvo) colorNodes = colorScale.findall('{%s}color' % SHEET_MAIN_NS) for color in colorNodes: c = Color(Color.BLACK) if color_index and color.get('indexed') is not None and 0 <= int(color.get('indexed')) < len(color_index): c.index = color_index[int(color.get('indexed'))] if color.get('theme') is not None: if color.get('tint') is not None: c.index = 'theme:%s:%s' % (color.get('theme'), color.get('tint')) else: c.index = 'theme:%s:' % color.get('theme') # prefix color with theme elif color.get('rgb'): c.index = color.get('rgb') rule['colorScale']['color'].append(c) iconSet = cfRule.find('{%s}iconSet' % SHEET_MAIN_NS) if iconSet is not None: rule['iconSet'] = {'cfvo': []} for iconAttr in ConditionalFormatting.icon_attributes: if iconSet.get(iconAttr) is not None: rule['iconSet'][iconAttr] = iconSet.get(iconAttr) cfvoNodes = iconSet.findall('{%s}cfvo' % SHEET_MAIN_NS) for node in cfvoNodes: cfvo = {} if node.get('type') is not None: cfvo['type'] = node.get('type') if node.get('val') is not None: cfvo['val'] = node.get('val') rule['iconSet']['cfvo'].append(cfvo) rules[range_string].append(rule) if len(rules): ws.conditional_formatting.setRules(rules)