def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries( range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 style_table = read_style_table(archive.read(ARC_STYLE)) source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
def read_dimension(xml_source): source = _get_xml_iter(xml_source) it = iterparse(source) smax_col = None smax_row = None smin_col = None smin_row = None for event, element in it: if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension': ref = element.get('ref') if ':' in ref: min_range, max_range = ref.split(':') else: min_range = max_range = ref min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) return min_col, min_row, max_col, max_row if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c': # Supposedly the dimension is mandatory, but in practice it can be # left off sometimes, if so, observe the max/min extants and return # those instead. col, row = coordinate_from_string(element.get('r')) if smin_row is None: #initialize the observed max/min values smin_col = smax_col = col smin_row = smax_row = row else: #Keep track of the seen max and min (fallback if there's no dimension) smin_col = min(smin_col, col) smin_row = min(smin_row, row) smax_col = max(smax_col, col) smax_row = max(smax_row, row) else: element.clear() return smin_col, smin_row, smax_col, smax_row
def iter_rows( workbook_name, sheet_name, xml_source, shared_date, string_table, range_string="", row_offset=0, column_offset=0 ): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 style_table = read_style_table(archive.read(ARC_STYLE)) source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
def fast_parse(ws, xml_source, string_table, style_table): xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) mergeCells = root.find(QName(xmlns, 'mergeCells').text) if mergeCells is not None: mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text) for mergeCell in mergeCellNodes: ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in filter(filter_cells, it): value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') coordinate = element.get('r') style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find(QName(xmlns, 'cols').text) if cols is not None: colNodes = cols.findall(QName(xmlns, 'col').text) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = col.get('style') printOptions = root.find(QName(xmlns, 'printOptions').text) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find(QName(xmlns, 'pageMargins').text) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find(QName(xmlns, 'pageSetup').text) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find(QName(xmlns, 'headerFooter').text) if headerFooter is not None: oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text) if oddHeader is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text) if oddFooter is not None: ws.header_footer.setFooter(oddFooter.text)