def read_excel_base_date(xml_source): root = fromstring(text=xml_source) wbPr = root.find(QName("http://schemas.openxmlformats.org/spreadsheetml/2006/main", "workbookPr").text) if "date1904" in wbPr.keys() and wbPr.attrib["date1904"] in ("1", "true"): return CALENDAR_MAC_1904 return CALENDAR_WINDOWS_1900
def read_properties_core(xml_source): """Read assorted file properties.""" properties = DocumentProperties() root = fromstring(xml_source) creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text) if creator_node is not None: properties.creator = creator_node.text else: properties.creator = '' last_modified_by_node = root.find( QName(NAMESPACES['cp'], 'lastModifiedBy').text) if last_modified_by_node is not None: properties.last_modified_by = last_modified_by_node.text else: properties.last_modified_by = '' created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text) if created_node is not None: properties.created = W3CDTF_to_datetime(created_node.text) else: properties.created = datetime.datetime.now() modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text) if modified_node is not None: properties.modified = W3CDTF_to_datetime(modified_node.text) else: properties.modified = properties.created return properties
def read_properties_core(xml_source): """Read assorted file properties.""" properties = DocumentProperties() root = fromstring(xml_source) creator_node = root.find(QName(NAMESPACES["dc"], "creator").text) if creator_node is not None: properties.creator = creator_node.text else: properties.creator = "" last_modified_by_node = root.find(QName(NAMESPACES["cp"], "lastModifiedBy").text) if last_modified_by_node is not None: properties.last_modified_by = last_modified_by_node.text else: properties.last_modified_by = "" created_node = root.find(QName(NAMESPACES["dcterms"], "created").text) if created_node is not None: properties.created = W3CDTF_to_datetime(created_node.text) else: properties.created = datetime.datetime.now() modified_node = root.find(QName(NAMESPACES["dcterms"], "modified").text) if modified_node is not None: properties.modified = W3CDTF_to_datetime(modified_node.text) else: properties.modified = properties.created return properties
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find( QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'sheets').text) return [sheet.get('name') for sheet in titles_root.getchildren()]
def read_excel_base_date(xml_source): root = fromstring(text=xml_source) wbPr = root.find( QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'workbookPr').text) if ('date1904' in wbPr.keys() and wbPr.attrib['date1904'] in ('1', 'true')): return CALENDAR_MAC_1904 return CALENDAR_WINDOWS_1900
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" named_ranges = [] root = fromstring(xml_source) names_root = root.find( QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'definedNames').text) if names_root is not None: for name_node in names_root.getchildren(): range_name = name_node.get('name') if name_node.get("hidden", '0') == '1': continue valid = True for discarded_range in DISCARDED_RANGES: if discarded_range in range_name: valid = False for bad_range in BUGGY_NAMED_RANGES: if bad_range in name_node.text: valid = False if valid: if refers_to_range(name_node.text): destinations = split_named_range(name_node.text) new_destinations = [] for worksheet, cells_range in destinations: # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list # # we just ignore them here worksheet = workbook.get_sheet_by_name(worksheet) if worksheet: new_destinations.append((worksheet, cells_range)) named_range = NamedRange(range_name, new_destinations) else: named_range = NamedRangeContainingValue( range_name, name_node.text) location_id = name_node.get("localSheetId") if location_id: named_range.scope = workbook.worksheets[int(location_id)] named_ranges.append(named_range) return named_ranges
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" named_ranges = [] root = fromstring(xml_source) names_root = root.find(QName("http://schemas.openxmlformats.org/spreadsheetml/2006/main", "definedNames").text) if names_root is not None: for name_node in names_root.getchildren(): range_name = name_node.get("name") if name_node.get("hidden", "0") == "1": continue valid = True for discarded_range in DISCARDED_RANGES: if discarded_range in range_name: valid = False for bad_range in BUGGY_NAMED_RANGES: if bad_range in name_node.text: valid = False if valid: if refers_to_range(name_node.text): destinations = split_named_range(name_node.text) new_destinations = [] for worksheet, cells_range in destinations: # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list # # we just ignore them here worksheet = workbook.get_sheet_by_name(worksheet) if worksheet: new_destinations.append((worksheet, cells_range)) named_range = NamedRange(range_name, new_destinations) else: named_range = NamedRangeContainingValue(range_name, name_node.text) location_id = name_node.get("localSheetId") if location_id: named_range.scope = workbook.worksheets[int(location_id)] named_ranges.append(named_range) return named_ranges
def read_string_table(xml_source): """Read in all shared strings in the table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(text=xml_source) string_index_nodes = root.findall(QName(xmlns, 'si').text) for index, string_index_node in enumerate(string_index_nodes): string = get_string(xmlns, string_index_node) # fix XML escaping sequence for '_x' string = string.replace('x005F_', '') table[index] = string return table
def read_style_table(xml_source): """Read styles from the shared style table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) custom_num_formats = parse_custom_num_formats(root, xmlns) color_index = parse_color_index(root, xmlns) font_list = parse_fonts(root, xmlns, color_index) fill_list = parse_fills(root, xmlns, color_index) border_list = parse_borders(root, xmlns, color_index) builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs = root.find(QName(xmlns, 'cellXfs').text) if cell_xfs is not None: # can happen on bad OOXML writers (e.g. Gnumeric) cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style() number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: new_style.number_format.format_code = \ builtin_formats.get(number_format_id, 'General') else: if number_format_id in custom_num_formats: new_style.number_format.format_code = \ custom_num_formats[number_format_id] else: raise MissingNumberFormat('%s' % number_format_id) if cell_xfs_node.get('applyAlignment') == '1': alignment = cell_xfs_node.find(QName(xmlns, 'alignment').text) if alignment is not None: if alignment.get('horizontal') is not None: new_style.alignment.horizontal = alignment.get('horizontal') if alignment.get('vertical') is not None: new_style.alignment.vertical = alignment.get('vertical') if alignment.get('wrapText'): new_style.alignment.wrap_text = True if alignment.get('shrinkToFit'): new_style.alignment.shrink_to_fit = True if alignment.get('indent') is not None: new_style.alignment.ident = int(alignment.get('indent')) if alignment.get('textRotation') is not None: new_style.alignment.text_rotation = int(alignment.get('textRotation')) # ignore justifyLastLine option when horizontal = distributed if cell_xfs_node.get('applyFont') == '1': new_style.font = deepcopy(font_list[int(cell_xfs_node.get('fontId'))]) new_style.font.color = deepcopy(font_list[int(cell_xfs_node.get('fontId'))].color) if cell_xfs_node.get('applyFill') == '1': new_style.fill = deepcopy(fill_list[int(cell_xfs_node.get('fillId'))]) new_style.fill.start_color = deepcopy(fill_list[int(cell_xfs_node.get('fillId'))].start_color) new_style.fill.end_color = deepcopy(fill_list[int(cell_xfs_node.get('fillId'))].end_color) if cell_xfs_node.get('applyBorder') == '1': new_style.borders = deepcopy(border_list[int(cell_xfs_node.get('borderId'))]) new_style.borders.left = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].left) new_style.borders.left.color = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].left.color) new_style.borders.right = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].right) new_style.borders.right.color = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].right.color) new_style.borders.top = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].top) new_style.borders.top.color = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].top.color) new_style.borders.bottom = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].bottom) new_style.borders.bottom.color = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].bottom.color) new_style.borders.diagonal = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].diagonal) new_style.borders.diagonal.color = deepcopy(border_list[int(cell_xfs_node.get('borderId'))].diagonal.color) if cell_xfs_node.get('applyProtection') == '1': protection = cell_xfs_node.find(QName(xmlns, 'protection').text) # Ignore if there are no protection sub-nodes if protection is not None: if protection.get('locked') is not None: if protection.get('locked') == '1': new_style.protection.locked = Protection.PROTECTION_PROTECTED else: new_style.protection.locked = Protection.PROTECTION_UNPROTECTED if protection.get('hidden') is not None: if protection.get('hidden') == '1': new_style.protection.hidden = Protection.PROTECTION_PROTECTED else: new_style.protection.hidden = Protection.PROTECTION_UNPROTECTED table[index] = new_style return table
def write_theme(): """Write the theme xml.""" xml_node = fromstring( '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' '<a:theme xmlns:a="http://schemas.openxmlformats.org/' 'drawingml/2006/main" name="Office Theme">' '<a:themeElements>' '<a:clrScheme name="Office">' '<a:dk1><a:sysClr val="windowText" lastClr="000000"/></a:dk1>' '<a:lt1><a:sysClr val="window" lastClr="FFFFFF"/></a:lt1>' '<a:dk2><a:srgbClr val="1F497D"/></a:dk2>' '<a:lt2><a:srgbClr val="EEECE1"/></a:lt2>' '<a:accent1><a:srgbClr val="4F81BD"/></a:accent1>' '<a:accent2><a:srgbClr val="C0504D"/></a:accent2>' '<a:accent3><a:srgbClr val="9BBB59"/></a:accent3>' '<a:accent4><a:srgbClr val="8064A2"/></a:accent4>' '<a:accent5><a:srgbClr val="4BACC6"/></a:accent5>' '<a:accent6><a:srgbClr val="F79646"/></a:accent6>' '<a:hlink><a:srgbClr val="0000FF"/></a:hlink>' '<a:folHlink><a:srgbClr val="800080"/></a:folHlink>' '</a:clrScheme>' '<a:fontScheme name="Office">' '<a:majorFont>' '<a:latin typeface="Cambria"/>' '<a:ea typeface=""/>' '<a:cs typeface=""/>' '<a:font script="Jpan" typeface="MS Pゴシック"/>' '<a:font script="Hang" typeface="맑은 고딕"/>' '<a:font script="Hans" typeface="宋体"/>' '<a:font script="Hant" typeface="新細明體"/>' '<a:font script="Arab" typeface="Times New Roman"/>' '<a:font script="Hebr" typeface="Times New Roman"/>' '<a:font script="Thai" typeface="Tahoma"/>' '<a:font script="Ethi" typeface="Nyala"/>' '<a:font script="Beng" typeface="Vrinda"/>' '<a:font script="Gujr" typeface="Shruti"/>' '<a:font script="Khmr" typeface="MoolBoran"/>' '<a:font script="Knda" typeface="Tunga"/>' '<a:font script="Guru" typeface="Raavi"/>' '<a:font script="Cans" typeface="Euphemia"/>' '<a:font script="Cher" typeface="Plantagenet Cherokee"/>' '<a:font script="Yiii" typeface="Microsoft Yi Baiti"/>' '<a:font script="Tibt" typeface="Microsoft Himalaya"/>' '<a:font script="Thaa" typeface="MV Boli"/>' '<a:font script="Deva" typeface="Mangal"/>' '<a:font script="Telu" typeface="Gautami"/>' '<a:font script="Taml" typeface="Latha"/>' '<a:font script="Syrc" typeface="Estrangelo Edessa"/>' '<a:font script="Orya" typeface="Kalinga"/>' '<a:font script="Mlym" typeface="Kartika"/>' '<a:font script="Laoo" typeface="DokChampa"/>' '<a:font script="Sinh" typeface="Iskoola Pota"/>' '<a:font script="Mong" typeface="Mongolian Baiti"/>' '<a:font script="Viet" typeface="Times New Roman"/>' '<a:font script="Uigh" typeface="Microsoft Uighur"/>' '</a:majorFont>' '<a:minorFont>' '<a:latin typeface="Calibri"/>' '<a:ea typeface=""/>' '<a:cs typeface=""/>' '<a:font script="Jpan" typeface="MS Pゴシック"/>' '<a:font script="Hang" typeface="맑은 고딕"/>' '<a:font script="Hans" typeface="宋体"/>' '<a:font script="Hant" typeface="新細明體"/>' '<a:font script="Arab" typeface="Arial"/>' '<a:font script="Hebr" typeface="Arial"/>' '<a:font script="Thai" typeface="Tahoma"/>' '<a:font script="Ethi" typeface="Nyala"/>' '<a:font script="Beng" typeface="Vrinda"/>' '<a:font script="Gujr" typeface="Shruti"/>' '<a:font script="Khmr" typeface="DaunPenh"/>' '<a:font script="Knda" typeface="Tunga"/>' '<a:font script="Guru" typeface="Raavi"/>' '<a:font script="Cans" typeface="Euphemia"/>' '<a:font script="Cher" typeface="Plantagenet Cherokee"/>' '<a:font script="Yiii" typeface="Microsoft Yi Baiti"/>' '<a:font script="Tibt" typeface="Microsoft Himalaya"/>' '<a:font script="Thaa" typeface="MV Boli"/>' '<a:font script="Deva" typeface="Mangal"/>' '<a:font script="Telu" typeface="Gautami"/>' '<a:font script="Taml" typeface="Latha"/>' '<a:font script="Syrc" typeface="Estrangelo Edessa"/>' '<a:font script="Orya" typeface="Kalinga"/>' '<a:font script="Mlym" typeface="Kartika"/>' '<a:font script="Laoo" typeface="DokChampa"/>' '<a:font script="Sinh" typeface="Iskoola Pota"/>' '<a:font script="Mong" typeface="Mongolian Baiti"/>' '<a:font script="Viet" typeface="Arial"/>' '<a:font script="Uigh" typeface="Microsoft Uighur"/>' '</a:minorFont>' '</a:fontScheme>' '<a:fmtScheme name="Office">' '<a:fillStyleLst>' '<a:solidFill><a:schemeClr val="phClr"/></a:solidFill>' '<a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:tint val="50000"/>' '<a:satMod val="300000"/></a:schemeClr></a:gs>' '<a:gs pos="35000"><a:schemeClr val="phClr"><a:tint val="37000"/>' '<a:satMod val="300000"/></a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr"><a:tint val="15000"/>' '<a:satMod val="350000"/></a:schemeClr></a:gs></a:gsLst>' '<a:lin ang="16200000" scaled="1"/></a:gradFill>' '<a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:shade val="51000"/>' '<a:satMod val="130000"/></a:schemeClr></a:gs>' '<a:gs pos="80000"><a:schemeClr val="phClr"><a:shade val="93000"/>' '<a:satMod val="130000"/></a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr">' '<a:shade val="94000"/>' '<a:satMod val="135000"/></a:schemeClr></a:gs></a:gsLst>' '<a:lin ang="16200000" scaled="0"/></a:gradFill></a:fillStyleLst>' '<a:lnStyleLst>' '<a:ln w="9525" cap="flat" cmpd="sng" algn="ctr">' '<a:solidFill><a:schemeClr val="phClr"><a:shade val="95000"/>' '<a:satMod val="105000"/></a:schemeClr></a:solidFill>' '<a:prstDash val="solid"/></a:ln>' '<a:ln w="25400" cap="flat" cmpd="sng" algn="ctr"><a:solidFill>' '<a:schemeClr val="phClr"/></a:solidFill>' '<a:prstDash val="solid"/></a:ln>' '<a:ln w="38100" cap="flat" cmpd="sng" algn="ctr"><a:solidFill>' '<a:schemeClr val="phClr"/></a:solidFill>' '<a:prstDash val="solid"/></a:ln></a:lnStyleLst>' '<a:effectStyleLst><a:effectStyle><a:effectLst>' '<a:outerShdw blurRad="40000" dist="20000" dir="5400000" ' 'rotWithShape="0"><a:srgbClr val="000000">' '<a:alpha val="38000"/></a:srgbClr></a:outerShdw></a:effectLst>' '</a:effectStyle><a:effectStyle><a:effectLst>' '<a:outerShdw blurRad="40000" dist="23000" dir="5400000" ' 'rotWithShape="0"><a:srgbClr val="000000">' '<a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst>' '</a:effectStyle><a:effectStyle><a:effectLst>' '<a:outerShdw blurRad="40000" dist="23000" dir="5400000" ' 'rotWithShape="0"><a:srgbClr val="000000">' '<a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst>' '<a:scene3d><a:camera prst="orthographicFront">' '<a:rot lat="0" lon="0" rev="0"/></a:camera>' '<a:lightRig rig="threePt" dir="t">' '<a:rot lat="0" lon="0" rev="1200000"/></a:lightRig>' '</a:scene3d><a:sp3d><a:bevelT w="63500" h="25400"/>' '</a:sp3d></a:effectStyle></a:effectStyleLst>' '<a:bgFillStyleLst><a:solidFill><a:schemeClr val="phClr"/>' '</a:solidFill><a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:tint val="40000"/>' '<a:satMod val="350000"/></a:schemeClr></a:gs>' '<a:gs pos="40000"><a:schemeClr val="phClr"><a:tint val="45000"/>' '<a:shade val="99000"/><a:satMod val="350000"/>' '</a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr">' '<a:shade val="20000"/><a:satMod val="255000"/>' '</a:schemeClr></a:gs></a:gsLst>' '<a:path path="circle">' '<a:fillToRect l="50000" t="-80000" r="50000" b="180000"/>' '</a:path>' '</a:gradFill><a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:tint val="80000"/>' '<a:satMod val="300000"/></a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr">' '<a:shade val="30000"/><a:satMod val="200000"/>' '</a:schemeClr></a:gs></a:gsLst>' '<a:path path="circle">' '<a:fillToRect l="50000" t="50000" r="50000" b="50000"/></a:path>' '</a:gradFill></a:bgFillStyleLst></a:fmtScheme>' '</a:themeElements>' '<a:objectDefaults/><a:extraClrSchemeLst/>' '</a:theme>') return get_document_content(xml_node)
def read_style_table(xml_source): """Read styles from the shared style table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) custom_num_formats = parse_custom_num_formats(root, xmlns) color_index = parse_color_index(root, xmlns) font_list = parse_fonts(root, xmlns, color_index) fill_list = parse_fills(root, xmlns, color_index) border_list = parse_borders(root, xmlns, color_index) builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs = root.find(QName(xmlns, 'cellXfs').text) if cell_xfs is not None: # can happen on bad OOXML writers (e.g. Gnumeric) cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style() number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: new_style.number_format.format_code = \ builtin_formats.get(number_format_id, 'General') else: if number_format_id in custom_num_formats: new_style.number_format.format_code = \ custom_num_formats[number_format_id] else: raise MissingNumberFormat('%s' % number_format_id) if cell_xfs_node.get('applyAlignment') == '1': alignment = cell_xfs_node.find(QName(xmlns, 'alignment').text) if alignment is not None: if alignment.get('horizontal') is not None: new_style.alignment.horizontal = alignment.get( 'horizontal') if alignment.get('vertical') is not None: new_style.alignment.vertical = alignment.get( 'vertical') if alignment.get('wrapText'): new_style.alignment.wrap_text = True if alignment.get('shrinkToFit'): new_style.alignment.shrink_to_fit = True if alignment.get('indent') is not None: new_style.alignment.ident = int( alignment.get('indent')) if alignment.get('textRotation') is not None: new_style.alignment.text_rotation = int( alignment.get('textRotation')) # ignore justifyLastLine option when horizontal = distributed if cell_xfs_node.get('applyFont') == '1': new_style.font = deepcopy(font_list[int( cell_xfs_node.get('fontId'))]) new_style.font.color = deepcopy(font_list[int( cell_xfs_node.get('fontId'))].color) if cell_xfs_node.get('applyFill') == '1': new_style.fill = deepcopy(fill_list[int( cell_xfs_node.get('fillId'))]) new_style.fill.start_color = deepcopy(fill_list[int( cell_xfs_node.get('fillId'))].start_color) new_style.fill.end_color = deepcopy(fill_list[int( cell_xfs_node.get('fillId'))].end_color) if cell_xfs_node.get('applyBorder') == '1': new_style.borders = deepcopy(border_list[int( cell_xfs_node.get('borderId'))]) new_style.borders.left = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].left) new_style.borders.left.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].left.color) new_style.borders.right = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].right) new_style.borders.right.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].right.color) new_style.borders.top = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].top) new_style.borders.top.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].top.color) new_style.borders.bottom = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].bottom) new_style.borders.bottom.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].bottom.color) new_style.borders.diagonal = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].diagonal) new_style.borders.diagonal.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].diagonal.color) if cell_xfs_node.get('applyProtection') == '1': protection = cell_xfs_node.find( QName(xmlns, 'protection').text) # Ignore if there are no protection sub-nodes if protection is not None: if protection.get('locked') is not None: if protection.get('locked') == '1': new_style.protection.locked = Protection.PROTECTION_PROTECTED else: new_style.protection.locked = Protection.PROTECTION_UNPROTECTED if protection.get('hidden') is not None: if protection.get('hidden') == '1': new_style.protection.hidden = Protection.PROTECTION_PROTECTED else: new_style.protection.hidden = Protection.PROTECTION_UNPROTECTED table[index] = new_style return table
def fast_parse(ws, xml_source, string_table, style_table): xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) mergeCells = root.find(QName(xmlns, 'mergeCells').text) if mergeCells is not None: mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text) for mergeCell in mergeCellNodes: ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in filter(filter_cells, it): value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') coordinate = element.get('r') style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find(QName(xmlns, 'cols').text) if cols is not None: colNodes = cols.findall(QName(xmlns, 'col').text) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = col.get('style') printOptions = root.find(QName(xmlns, 'printOptions').text) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find(QName(xmlns, 'pageMargins').text) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find(QName(xmlns, 'pageSetup').text) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find(QName(xmlns, 'headerFooter').text) if headerFooter is not None: oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text) if oddHeader is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text) if oddFooter is not None: ws.header_footer.setFooter(oddFooter.text)
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find(QName("http://schemas.openxmlformats.org/spreadsheetml/2006/main", "sheets").text) return [sheet.get("name") for sheet in titles_root.getchildren()]