def assert_equals_file_content(reference_file, fixture, filetype = 'xml'): if os.path.isfile(fixture): with open(fixture) as fixture_file: fixture_content = fixture_file.read() else: fixture_content = fixture with open(reference_file) as expected_file: expected_content = expected_file.read() if filetype == 'xml': fixture_content = fromstring(fixture_content) pretty_indent(fixture_content) temp = StringIO() ElementTree(fixture_content).write(temp) fixture_content = temp.getvalue() expected_content = fromstring(expected_content) pretty_indent(expected_content) temp = StringIO() ElementTree(expected_content).write(temp) expected_content = temp.getvalue() fixture_lines = fixture_content.split('\n') expected_lines = expected_content.split('\n') differences = list(difflib.unified_diff(expected_lines, fixture_lines)) if differences: temp = StringIO() pprint(differences, stream = temp) assert False, 'Differences found : %s' % temp.getvalue()
def assert_equals_file_content(reference_file, fixture, filetype='xml'): if os.path.isfile(fixture): with open(fixture) as fixture_file: fixture_content = fixture_file.read() else: fixture_content = fixture with open(reference_file) as expected_file: expected_content = expected_file.read() if filetype == 'xml': fixture_content = fromstring(fixture_content) pretty_indent(fixture_content) temp = StringIO() ElementTree(fixture_content).write(temp) fixture_content = temp.getvalue() expected_content = fromstring(expected_content) pretty_indent(expected_content) temp = StringIO() ElementTree(expected_content).write(temp) expected_content = temp.getvalue() fixture_lines = fixture_content.split('\n') expected_lines = expected_content.split('\n') differences = list(difflib.unified_diff(expected_lines, fixture_lines)) if differences: temp = StringIO() pprint(differences, stream=temp) assert False, 'Differences found : %s' % temp.getvalue()
def read_style_table(xml_source): """Read styles from the shared style table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) custom_num_formats = parse_custom_num_formats(root, xmlns) builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs = root.find(QName(xmlns, 'cellXfs').text) if cell_xfs is not None: # can happen on bad OOXML writers (e.g. Gnumeric) cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style() number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: new_style.number_format.format_code = \ builtin_formats.get(number_format_id, 'General') else: if number_format_id in custom_num_formats: new_style.number_format.format_code = \ custom_num_formats[number_format_id] else: raise MissingNumberFormat('%s' % number_format_id) table[index] = new_style return table
def read_excel_base_date(xml_source): root = fromstring(text = xml_source) wbPr = root.find(QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'workbookPr').text) if ('date1904' in wbPr.keys() and wbPr.attrib['date1904'] in ('1', 'true')): return CALENDAR_MAC_1904 return CALENDAR_WINDOWS_1900
def read_excel_base_date(xml_source): root = fromstring(text = xml_source) wbPr = root.find('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}workbookPr') if wbPr is not None and wbPr.get('date1904') in ('1', 'true'): return CALENDAR_MAC_1904 return CALENDAR_WINDOWS_1900
def read_excel_base_date(xml_source): root = fromstring(text = xml_source) wbPr = root.find('{%s}workbookPr' % SHEET_MAIN_NS) if wbPr is not None and wbPr.get('date1904') in ('1', 'true'): return CALENDAR_MAC_1904 return CALENDAR_WINDOWS_1900
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find(QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'sheets').text) return [sheet.get('name') for sheet in list(titles_root)]
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find(QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'sheets').text) return [sheet.get('name') for sheet in titles_root.getchildren()]
def read_excel_base_date(xml_source): root = fromstring(text=xml_source) wbPr = root.find('{%s}workbookPr' % SHEET_MAIN_NS) if wbPr is not None and wbPr.get('date1904') in ('1', 'true'): return CALENDAR_MAC_1904 return CALENDAR_WINDOWS_1900
def read_properties_core(xml_source): """Read assorted file properties.""" properties = DocumentProperties() root = fromstring(xml_source) creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text) if creator_node is not None: properties.creator = creator_node.text else: properties.creator = '' last_modified_by_node = root.find( QName(NAMESPACES['cp'], 'lastModifiedBy').text) if last_modified_by_node is not None: properties.last_modified_by = last_modified_by_node.text else: properties.last_modified_by = '' created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text) if created_node is not None: properties.created = W3CDTF_to_datetime(created_node.text) else: properties.created = datetime.datetime.now() modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text) if modified_node is not None: properties.modified = W3CDTF_to_datetime(modified_node.text) else: properties.modified = properties.created return properties
def read_content_types(xml_source): """Read content types.""" root = fromstring(xml_source) contents_root = root.findall(QName('http://schemas.openxmlformats.org/package/2006/content-types', 'Override').text) for type in contents_root: yield type.get('PartName'), type.get('ContentType')
def test_write_comments_vml(): ws = _create_ws()[0] cw = CommentWriter(ws) reference_file = os.path.join(DATADIR, 'writer', 'expected', 'commentsDrawing1.vml') content = cw.write_comments_vml() with open(reference_file) as expected: correct = fromstring(expected.read()) check = fromstring(content) correct_ids = [] correct_coords = [] check_ids = [] check_coords = [] for i in correct.findall("{%s}shape" % vmlns): correct_ids.append(i.attrib["id"]) row = i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text col = i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text correct_coords.append((row, col)) # blank the data we are checking separately i.attrib["id"] = "0" i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text = "0" i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text = "0" for i in check.findall("{%s}shape" % vmlns): check_ids.append(i.attrib["id"]) row = i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text col = i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text check_coords.append((row, col)) # blank the data we are checking separately i.attrib["id"] = "0" i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text = "0" i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text = "0" assert set(correct_coords) == set(check_coords) assert set(correct_ids) == set(check_ids) diff = compare_xml(get_document_content(correct), get_document_content(check)) assert diff is None, diff
def read_workbook_settings(self, xml_source): root = fromstring(xml_source) view = root.find('*/' '{%s}workbookView' % SHEET_MAIN_NS) if view is None: return if 'activeTab' in view.attrib: self.active = int(view.attrib['activeTab'])
def read_workbook_settings(self, xml_source): root = fromstring(xml_source) view = root.find("*/" "{%s}workbookView" % SHEET_MAIN_NS) if view is None: return if "activeTab" in view.attrib: self.active = int(view.attrib["activeTab"])
def test_write_comments(): ws = _create_ws()[0] reference_file = os.path.join(DATADIR, 'writer', 'expected', 'comments1.xml') cw = CommentWriter(ws) content = cw.write_comments() with open(reference_file) as expected: correct = fromstring(expected.read()) check = fromstring(content) # check top-level elements have the same name for i, j in zip(correct.getchildren(), check.getchildren()): assert i.tag == j.tag correct_comments = correct.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() check_comments = check.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() correct_authors = correct.find('{%s}authors' % SHEET_MAIN_NS).getchildren() check_authors = check.find('{%s}authors' % SHEET_MAIN_NS).getchildren() # replace author ids with author names for i in correct_comments: i.attrib["authorId"] = correct_authors[int( i.attrib["authorId"])].text for i in check_comments: i.attrib["authorId"] = check_authors[int( i.attrib["authorId"])].text # sort the comment list correct_comments.sort(key=lambda tag: tag.attrib["ref"]) check_comments.sort(key=lambda tag: tag.attrib["ref"]) correct.find('{%s}commentList' % SHEET_MAIN_NS)[:] = correct_comments check.find('{%s}commentList' % SHEET_MAIN_NS)[:] = check_comments # sort the author list correct_authors.sort(key=lambda tag: tag.text) check_authors.sort(key=lambda tag: tag.text) correct.find('{%s}authors' % SHEET_MAIN_NS)[:] = correct_authors check.find('{%s}authors' % SHEET_MAIN_NS)[:] = check_authors diff = compare_xml(get_document_content(correct), get_document_content(check)) assert diff is None, diff
def read_string_table(xml_source): """Read in all shared strings in the table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(text=xml_source) string_index_nodes = root.findall(QName(xmlns, 'si').text) for index, string_index_node in enumerate(string_index_nodes): table[index] = get_string(xmlns, string_index_node) return table
def test_serialised(self, bar_chart_2): """Check the serialised file against sample""" cw = BarChartWriter(bar_chart_2) xml = cw.write() tree = fromstring(xml) chart_schema.assertValid(tree) expected_file = os.path.join(DATADIR, "writer", "expected", "BarChart.xml") with open(expected_file) as expected: diff = compare_xml(xml, expected.read()) assert diff is None, diff
def write_content_types(workbook): """Write the content-types xml.""" seen = set() if workbook.vba_archive: root = fromstring(workbook.vba_archive.read(ARC_CONTENT_TYPES)) for elem in root.findall('{%s}Override' % CONTYPES_NS): seen.add(elem.attrib['PartName']) else: root = Element('{%s}Types' % CONTYPES_NS) for setting_type, name, content_type in static_content_types_config: if setting_type == 'Override': tag = '{%s}Override' % CONTYPES_NS attrib = {'PartName': '/' + name} else: tag = '{%s}Default' % CONTYPES_NS attrib = {'Extension': name} attrib['ContentType'] = content_type SubElement(root, tag, attrib) drawing_id = 1 chart_id = 1 comments_id = 1 for sheet_id, sheet in enumerate(workbook.worksheets): name = '/xl/worksheets/sheet%d.xml' % (sheet_id + 1) if name not in seen: SubElement(root, '{%s}Override' % CONTYPES_NS, {'PartName': name, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'}) if sheet._charts or sheet._images: name = '/xl/drawings/drawing%d.xml' % drawing_id if name not in seen: SubElement(root, '{%s}Override' % CONTYPES_NS, {'PartName' : name, 'ContentType' : 'application/vnd.openxmlformats-officedocument.drawing+xml'}) drawing_id += 1 for chart in sheet._charts: name = '/xl/charts/chart%d.xml' % chart_id if name not in seen: SubElement(root, '{%s}Override' % CONTYPES_NS, {'PartName' : name, 'ContentType' : 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'}) chart_id += 1 if chart._shapes: name = '/xl/drawings/drawing%d.xml' % drawing_id if name not in seen: SubElement(root, '{%s}Override' % CONTYPES_NS, {'PartName' : name, 'ContentType' : 'application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml'}) drawing_id += 1 if sheet._comment_count > 0: SubElement(root, '{%s}Override' % CONTYPES_NS, {'PartName': '/xl/comments%d.xml' % comments_id, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml'}) comments_id += 1 return get_document_content(root)
def write_content_types(workbook): """Write the content-types xml.""" seen = set() if workbook.vba_archive: root = fromstring(workbook.vba_archive.read(ARC_CONTENT_TYPES)) register_namespace('', 'http://schemas.openxmlformats.org/package/2006/content-types') for elem in root.findall('{http://schemas.openxmlformats.org/package/2006/content-types}Override'): seen.add(elem.attrib['PartName']) else: root = Element('Types', {'xmlns': 'http://schemas.openxmlformats.org/package/2006/content-types'}) SubElement(root, 'Override', {'PartName': '/' + ARC_THEME, 'ContentType': 'application/vnd.openxmlformats-officedocument.theme+xml'}) SubElement(root, 'Override', {'PartName': '/' + ARC_STYLE, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml'}) SubElement(root, 'Default', {'Extension': 'rels', 'ContentType': 'application/vnd.openxmlformats-package.relationships+xml'}) SubElement(root, 'Default', {'Extension': 'xml', 'ContentType': 'application/xml'}) SubElement(root, 'Default', {'Extension': 'png', 'ContentType': 'image/png'}) SubElement(root, 'Override', {'PartName': '/' + ARC_WORKBOOK, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml'}) SubElement(root, 'Override', {'PartName': '/' + ARC_APP, 'ContentType': 'application/vnd.openxmlformats-officedocument.extended-properties+xml'}) SubElement(root, 'Override', {'PartName': '/' + ARC_CORE, 'ContentType': 'application/vnd.openxmlformats-package.core-properties+xml'}) SubElement(root, 'Override', {'PartName': '/' + ARC_SHARED_STRINGS, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml'}) drawing_id = 1 chart_id = 1 for sheet_id, sheet in enumerate(workbook.worksheets): part_name = '/xl/worksheets/sheet%d.xml' % (sheet_id + 1) if part_name not in seen: SubElement(root, 'Override', {'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'}) if sheet._charts or sheet._images: part_name = '/xl/drawings/drawing%d.xml' % drawing_id if part_name not in seen: SubElement(root, 'Override', {'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawing+xml'}) drawing_id += 1 for chart in sheet._charts: part_name = '/xl/charts/chart%d.xml' % chart_id if part_name not in seen: SubElement(root, 'Override', {'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'}) chart_id += 1 if chart._shapes: part_name = '/xl/drawings/drawing%d.xml' % drawing_id if part_name not in seen: SubElement(root, 'Override', {'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml'}) drawing_id += 1 return get_document_content(root)
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" named_ranges = [] root = fromstring(xml_source) names_root = root.find( QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'definedNames').text) if names_root is not None: for name_node in names_root.getchildren(): range_name = name_node.get('name') if name_node.get("hidden", '0') == '1': continue valid = True for discarded_range in DISCARDED_RANGES: if discarded_range in range_name: valid = False for bad_range in BUGGY_NAMED_RANGES: if bad_range in name_node.text: valid = False if valid: if refers_to_range(name_node.text): destinations = split_named_range(name_node.text) new_destinations = [] for worksheet, cells_range in destinations: # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list # # we just ignore them here worksheet = workbook.get_sheet_by_name(worksheet) if worksheet: new_destinations.append((worksheet, cells_range)) named_range = NamedRange(range_name, new_destinations) else: named_range = NamedRangeContainingValue( range_name, name_node.text) location_id = name_node.get("localSheetId") if location_id: named_range.scope = workbook.worksheets[int(location_id)] named_ranges.append(named_range) return named_ranges
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find(QName('http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', 'TitlesOfParts').text) vector = titles_root.find(QName(NAMESPACES['vt'], 'vector').text) parts, names = get_number_of_parts(xml_source) # we can't assume 'Worksheets' to be written in english, # but it's always the first item of the parts list (see bug #22) size = parts[names[0]] children = [c.text for c in vector.getchildren()] return children[:size]
def test_write_comments(): ws = _create_ws()[0] reference_file = os.path.join(DATADIR, 'writer', 'expected', 'comments1.xml') cw = CommentWriter(ws) content = cw.write_comments() with open(reference_file) as expected: correct = fromstring(expected.read()) check = fromstring(content) # check top-level elements have the same name for i, j in zip(correct.getchildren(), check.getchildren()): assert i.tag == j.tag correct_comments = correct.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() check_comments = check.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() correct_authors = correct.find('{%s}authors' % SHEET_MAIN_NS).getchildren() check_authors = check.find('{%s}authors' % SHEET_MAIN_NS).getchildren() # replace author ids with author names for i in correct_comments: i.attrib["authorId"] = correct_authors[int(i.attrib["authorId"])].text for i in check_comments: i.attrib["authorId"] = check_authors[int(i.attrib["authorId"])].text # sort the comment list correct_comments.sort(key=lambda tag: tag.attrib["ref"]) check_comments.sort(key=lambda tag: tag.attrib["ref"]) correct.find('{%s}commentList' % SHEET_MAIN_NS)[:] = correct_comments check.find('{%s}commentList' % SHEET_MAIN_NS)[:] = check_comments # sort the author list correct_authors.sort(key=lambda tag: tag.text) check_authors.sort(key=lambda tag:tag.text) correct.find('{%s}authors' % SHEET_MAIN_NS)[:] = correct_authors check.find('{%s}authors' % SHEET_MAIN_NS)[:] = check_authors diff = compare_xml(get_document_content(correct), get_document_content(check)) assert diff is None, diff
def test_write_comments_vml(): ws = _create_ws()[0] cw = CommentWriter(ws) reference_file = os.path.join(DATADIR, 'writer', 'expected', 'commentsDrawing1.vml') content = cw.write_comments_vml() with open(reference_file) as expected: correct = fromstring(expected.read()) check = fromstring(content) correct_ids = [] correct_coords = [] check_ids = [] check_coords = [] for i in correct.findall("{%s}shape" % vmlns): correct_ids.append(i.attrib["id"]) row = i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text col = i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text correct_coords.append((row,col)) # blank the data we are checking separately i.attrib["id"] = "0" i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text="0" i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text="0" for i in check.findall("{%s}shape" % vmlns): check_ids.append(i.attrib["id"]) row = i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text col = i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text check_coords.append((row,col)) # blank the data we are checking separately i.attrib["id"] = "0" i.find("{%s}ClientData" % excelns).find("{%s}Row" % excelns).text="0" i.find("{%s}ClientData" % excelns).find("{%s}Column" % excelns).text="0" assert set(correct_coords) == set(check_coords) assert set(correct_ids) == set(check_ids) diff = compare_xml(get_document_content(correct), get_document_content(check)) assert diff is None, diff
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" named_ranges = [] root = fromstring(xml_source) names_root = root.find(QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'definedNames').text) if names_root is not None: for name_node in list(names_root): range_name = name_node.get('name') if name_node.get("hidden", '0') == '1': continue valid = True for discarded_range in DISCARDED_RANGES: if discarded_range in range_name: valid = False for bad_range in BUGGY_NAMED_RANGES: if bad_range in name_node.text: valid = False if valid: if refers_to_range(name_node.text): destinations = split_named_range(name_node.text) new_destinations = [] for worksheet, cells_range in destinations: # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list # # we just ignore them here worksheet = workbook.get_sheet_by_name(worksheet) if worksheet: new_destinations.append((worksheet, cells_range)) named_range = NamedRange(range_name, new_destinations) else: named_range = NamedRangeContainingValue(range_name, name_node.text) location_id = name_node.get("localSheetId") if location_id: named_range.scope = workbook.worksheets[int(location_id)] named_ranges.append(named_range) return named_ranges
def assert_equals_file_content(reference_file, fixture, filetype="xml"): if os.path.isfile(fixture): fixture_file = open(fixture) try: fixture_content = fixture_file.read() finally: fixture_file.close() else: fixture_content = fixture expected_file = open(reference_file) try: expected_content = expected_file.read() finally: expected_file.close() if filetype == "xml": fixture_content = fromstring(fixture_content) pretty_indent(fixture_content) temp = BytesIO() ElementTree(fixture_content).write(temp) fixture_content = temp.getvalue() expected_content = fromstring(expected_content) pretty_indent(expected_content) temp = BytesIO() ElementTree(expected_content).write(temp) expected_content = temp.getvalue() fixture_lines = unicode(fixture_content).split("\n") expected_lines = unicode(expected_content).split("\n") differences = list(difflib.unified_diff(expected_lines, fixture_lines)) if differences: temp = BytesIO() pprint(differences, stream=temp) assert False, "Differences found : %s" % temp.getvalue()
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find( QName( 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', 'TitlesOfParts').text) vector = titles_root.find(QName(NAMESPACES['vt'], 'vector').text) parts, names = get_number_of_parts(xml_source) # we can't assume 'Worksheets' to be written in english, # but it's always the first item of the parts list (see bug #22) size = parts[names[0]] children = [c.text for c in vector.getchildren()] return children[:size]
def read_string_table(xml_source): """Read in all shared strings in the table""" table = {} root = fromstring(text=xml_source) string_index_nodes = root.findall('{%s}si' % SHEET_MAIN_NS) for index, string_index_node in enumerate(string_index_nodes): string = get_string(string_index_node) # fix XML escaping sequence for '_x' string = string.replace('x005F_', '') table[index] = string return table
def get_comments_file(sheet_codename, archive, valid_files): """Returns the XML filename in the archive which contains the comments for the spreadsheet with codename sheet_codename. Returns None if there is no such file""" rels_file = PACKAGE_WORKSHEET_RELS + '/' + sheet_codename + '.rels' if rels_file not in valid_files: return None rels_source = archive.read(rels_file) root = fromstring(rels_source) for i in root: if i.attrib['Type'] == COMMENTS_NS: comments_file = path.normpath(PACKAGE_WORKSHEETS + '/' + i.attrib['Target']) if comments_file in valid_files: return comments_file return None
def read_string_table(xml_source): """Read in all shared strings in the table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(text=xml_source) string_index_nodes = root.findall(QName(xmlns, 'si').text) for index, string_index_node in enumerate(string_index_nodes): string = get_string(xmlns, string_index_node) # fix XML escaping sequence for '_x' string = string.replace('x005F_', '') table[index] = string return table
def get_comments_file(sheet_codename, archive, valid_files): """Returns the XML filename in the archive which contains the comments for the spreadsheet with codename sheet_codename. Returns None if there is no such file""" rels_file = PACKAGE_WORKSHEET_RELS + '/' + sheet_codename + '.rels' if rels_file not in valid_files: return None rels_source = archive.read(rels_file) root = fromstring(rels_source) for i in root: if i.attrib['Type'] == COMMENTS_NS: comments_file = os.path.split(i.attrib['Target'])[-1] comments_file = PACKAGE_XL + '/' + comments_file if comments_file in valid_files: return comments_file return None
def read_comments(ws, xml_source): """Given a worksheet and the XML of its comments file, assigns comments to cells""" root = fromstring(xml_source) authors = _get_author_list(root) comment_nodes = root.iter('{%s}comment' % SHEET_MAIN_NS) for node in comment_nodes: author = authors[int(node.attrib['authorId'])] cell = node.attrib['ref'] text_node = node.find('{%s}text' % SHEET_MAIN_NS) text = '' substrs = [] for run in text_node.findall('{%s}r' % SHEET_MAIN_NS): runtext = ''.join([t.text for t in run.findall('{%s}t' % SHEET_MAIN_NS)]) substrs.append(runtext) comment_text = ''.join(substrs) comment = Comment(comment_text, author) ws.cell(coordinate=cell).comment = comment
def get_number_of_parts(xml_source): """Get a list of contents of the workbook.""" parts_size = {} parts_names = [] root = fromstring(xml_source) heading_pairs = root.find(QName('http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', 'HeadingPairs').text) vector = heading_pairs.find(QName(NAMESPACES['vt'], 'vector').text) children = vector.getchildren() for child_id in range(0, len(children), 2): part_name = children[child_id].find(QName(NAMESPACES['vt'], 'lpstr').text).text if not part_name in parts_names: parts_names.append(part_name) part_size = int(children[child_id + 1].find(QName( NAMESPACES['vt'], 'i4').text).text) parts_size[part_name] = part_size return parts_size, parts_names
def read_comments(ws, xml_source): """Given a worksheet and the XML of its comments file, assigns comments to cells""" root = fromstring(xml_source) authors = _get_author_list(root) comment_nodes = root.iter('{%s}comment' % SHEET_MAIN_NS) for node in comment_nodes: author = authors[int(node.attrib['authorId'])] cell = node.attrib['ref'] text_node = node.find('{%s}text' % SHEET_MAIN_NS) text = '' substrs = [] for run in text_node.findall('{%s}r' % SHEET_MAIN_NS): runtext = ''.join( [t.text for t in run.findall('{%s}t' % SHEET_MAIN_NS)]) substrs.append(runtext) comment_text = ''.join(substrs) comment = Comment(comment_text, author) ws.cell(coordinate=cell).comment = comment
def read_style_table(xml_source): """Read styles from the shared style table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) custom_num_formats = parse_custom_num_formats(root, xmlns) builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs = root.find(QName(xmlns, 'cellXfs').text) cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style() number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: new_style.number_format.format_code = \ builtin_formats[number_format_id] else: new_style.number_format.format_code = \ custom_num_formats[number_format_id] table[index] = new_style return table
def read_properties_core(xml_source): """Read assorted file properties.""" properties = DocumentProperties() root = fromstring(xml_source) properties.creator = root.findtext('{%s}creator' % DCORE_NS, '') properties.last_modified_by = root.findtext('{%s}lastModifiedBy' % COREPROPS_NS, '') created_node = root.find('{%s}created' % DCTERMS_NS) if created_node is not None: properties.created = W3CDTF_to_datetime(created_node.text) else: properties.created = datetime.datetime.now() modified_node = root.find('{%s}modified' % DCTERMS_NS) if modified_node is not None: properties.modified = W3CDTF_to_datetime(modified_node.text) else: properties.modified = properties.created return properties
def get_number_of_parts(xml_source): """Get a list of contents of the workbook.""" parts_size = {} parts_names = [] root = fromstring(xml_source) heading_pairs = root.find( QName( 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', 'HeadingPairs').text) vector = heading_pairs.find(QName(NAMESPACES['vt'], 'vector').text) children = vector.getchildren() for child_id in range(0, len(children), 2): part_name = children[child_id].find( QName(NAMESPACES['vt'], 'lpstr').text).text if not part_name in parts_names: parts_names.append(part_name) part_size = int(children[child_id + 1].find( QName(NAMESPACES['vt'], 'i4').text).text) parts_size[part_name] = part_size return parts_size, parts_names
def read_properties_core(xml_source): """Read assorted file properties.""" properties = DocumentProperties() root = fromstring(xml_source) properties.creator = root.findtext('{%s}creator' % DCORE_NS, '') properties.last_modified_by = root.findtext( '{%s}lastModifiedBy' % COREPROPS_NS, '') created_node = root.find('{%s}created' % DCTERMS_NS) if created_node is not None: properties.created = W3CDTF_to_datetime(created_node.text) else: properties.created = datetime.datetime.now() modified_node = root.find('{%s}modified' % DCTERMS_NS) if modified_node is not None: properties.modified = W3CDTF_to_datetime(modified_node.text) else: properties.modified = properties.created return properties
def read_style_table(xml_source): """Read styles from the shared style table""" table = {} xmlns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" root = fromstring(xml_source) custom_num_formats = parse_custom_num_formats(root, xmlns) builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs = root.find(QName(xmlns, "cellXfs").text) cell_xfs_nodes = cell_xfs.findall(QName(xmlns, "xf").text) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style() number_format_id = int(cell_xfs_node.get("numFmtId")) if number_format_id < 164: new_style.number_format.format_code = builtin_formats.get(number_format_id, "General") else: if number_format_id in custom_num_formats: new_style.number_format.format_code = custom_num_formats[number_format_id] else: raise MissingNumberFormat("%s" % number_format_id) table[index] = new_style return table
def write_root_rels(workbook): """Write the relationships xml.""" root = Element('{%s}Relationships' % PKG_REL_NS) relation_tag = '{%s}Relationship' % PKG_REL_NS SubElement(root, relation_tag, {'Id': 'rId1', 'Target': ARC_WORKBOOK, 'Type': '%s/officeDocument' % REL_NS}) SubElement(root, relation_tag, {'Id': 'rId2', 'Target': ARC_CORE, 'Type': '%s/metadata/core-properties' % PKG_REL_NS}) SubElement(root, relation_tag, {'Id': 'rId3', 'Target': ARC_APP, 'Type': '%s/extended-properties' % REL_NS}) if workbook.vba_archive is not None: # See if there was a customUI relation and reuse its id arc = fromstring(workbook.vba_archive.read(ARC_ROOT_RELS)) rels = arc.findall(relation_tag) rId = None for rel in rels: if rel.get('Target') == ARC_CUSTOM_UI: rId = rel.get('Id') break if rId is not None: SubElement(root, relation_tag, {'Id': rId, 'Target': ARC_CUSTOM_UI, 'Type': '%s' % CUSTOMUI_NS}) return get_document_content(root)
def write_root_rels(workbook): """Write the relationships xml.""" root = Element('{%s}Relationships' % PKG_REL_NS) relation_tag = '{%s}Relationship' % PKG_REL_NS SubElement(root, relation_tag, { 'Id': 'rId1', 'Target': ARC_WORKBOOK, 'Type': '%s/officeDocument' % REL_NS }) SubElement( root, relation_tag, { 'Id': 'rId2', 'Target': ARC_CORE, 'Type': '%s/metadata/core-properties' % PKG_REL_NS }) SubElement(root, relation_tag, { 'Id': 'rId3', 'Target': ARC_APP, 'Type': '%s/extended-properties' % REL_NS }) if workbook.vba_archive is not None: # See if there was a customUI relation and reuse its id arc = fromstring(workbook.vba_archive.read(ARC_ROOT_RELS)) rels = arc.findall(relation_tag) rId = None for rel in rels: if rel.get('Target') == ARC_CUSTOM_UI: rId = rel.get('Id') break if rId is not None: SubElement(root, relation_tag, { 'Id': rId, 'Target': ARC_CUSTOM_UI, 'Type': '%s' % CUSTOMUI_NS }) return get_document_content(root)
def read_content_types(xml_source): """Read content types.""" root = fromstring(xml_source) contents_root = root.findall('{http://schemas.openxmlformats.org/package/2006/content-types}Override') for type in contents_root: yield type.get('PartName'), type.get('ContentType')
def write_content_types(workbook): """Write the content-types xml.""" seen = set() if workbook.vba_archive: root = fromstring(workbook.vba_archive.read(ARC_CONTENT_TYPES)) register_namespace( '', 'http://schemas.openxmlformats.org/package/2006/content-types') for elem in root.findall( '{http://schemas.openxmlformats.org/package/2006/content-types}Override' ): seen.add(elem.attrib['PartName']) else: root = Element( 'Types', { 'xmlns': 'http://schemas.openxmlformats.org/package/2006/content-types' }) SubElement( root, 'Override', { 'PartName': '/' + ARC_THEME, 'ContentType': 'application/vnd.openxmlformats-officedocument.theme+xml' }) SubElement( root, 'Override', { 'PartName': '/' + ARC_STYLE, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml' }) SubElement( root, 'Default', { 'Extension': 'rels', 'ContentType': 'application/vnd.openxmlformats-package.relationships+xml' }) SubElement(root, 'Default', { 'Extension': 'xml', 'ContentType': 'application/xml' }) SubElement(root, 'Default', { 'Extension': 'png', 'ContentType': 'image/png' }) SubElement( root, 'Override', { 'PartName': '/' + ARC_WORKBOOK, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml' }) SubElement( root, 'Override', { 'PartName': '/' + ARC_APP, 'ContentType': 'application/vnd.openxmlformats-officedocument.extended-properties+xml' }) SubElement( root, 'Override', { 'PartName': '/' + ARC_CORE, 'ContentType': 'application/vnd.openxmlformats-package.core-properties+xml' }) SubElement( root, 'Override', { 'PartName': '/' + ARC_SHARED_STRINGS, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml' }) drawing_id = 1 chart_id = 1 for sheet_id, sheet in enumerate(workbook.worksheets): part_name = '/xl/worksheets/sheet%d.xml' % (sheet_id + 1) if part_name not in seen: SubElement( root, 'Override', { 'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml' }) if sheet._charts or sheet._images: part_name = '/xl/drawings/drawing%d.xml' % drawing_id if part_name not in seen: SubElement( root, 'Override', { 'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawing+xml' }) drawing_id += 1 for chart in sheet._charts: part_name = '/xl/charts/chart%d.xml' % chart_id if part_name not in seen: SubElement( root, 'Override', { 'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml' }) chart_id += 1 if chart._shapes: part_name = '/xl/drawings/drawing%d.xml' % drawing_id if part_name not in seen: SubElement( root, 'Override', { 'PartName': part_name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml' }) drawing_id += 1 return get_document_content(root)
def test_write(self): xml = self.sw.write(0) tree = fromstring(xml) chart_schema.assertValid(tree) expected = """ <c:userShapes xmlns:c="http://schemas.openxmlformats.org/drawingml/2006/chart"> <cdr:relSizeAnchor xmlns:cdr="http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"> <cdr:from> <cdr:x>1</cdr:x> <cdr:y>1</cdr:y> </cdr:from> <cdr:to> <cdr:x>1</cdr:x> <cdr:y>1</cdr:y> </cdr:to> <cdr:sp macro="" textlink=""> <cdr:nvSpPr> <cdr:cNvPr id="0" name="shape 0" /> <cdr:cNvSpPr /> </cdr:nvSpPr> <cdr:spPr> <a:xfrm xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:off x="0" y="0" /> <a:ext cx="0" cy="0" /> </a:xfrm> <a:prstGeom prst="rect" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:avLst /> </a:prstGeom> <a:solidFill xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:srgbClr val="FFFFFF" /> </a:solidFill> <a:ln w="0" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:solidFill> <a:srgbClr val="000000" /> </a:solidFill> </a:ln> </cdr:spPr> <cdr:style> <a:lnRef idx="2" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:schemeClr val="accent1"> <a:shade val="50000" /> </a:schemeClr> </a:lnRef> <a:fillRef idx="1" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:schemeClr val="accent1" /> </a:fillRef> <a:effectRef idx="0" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:schemeClr val="accent1" /> </a:effectRef> <a:fontRef idx="minor" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:schemeClr val="lt1" /> </a:fontRef> </cdr:style> <cdr:txBody> <a:bodyPr vertOverflow="clip" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" /> <a:lstStyle xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" /> <a:p xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:r> <a:rPr lang="en-US"> <a:solidFill> <a:srgbClr val="000000" /> </a:solidFill> </a:rPr> <a:t>My first chart</a:t> </a:r> </a:p> </cdr:txBody> </cdr:sp> </cdr:relSizeAnchor> </c:userShapes> """ diff = compare_xml(xml, expected) assert diff is None, diff
def test_get_author_list(): xml = """<?xml version="1.0" standalone="yes"?><comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><authors> <author>Cuke</author><author>Not Cuke</author></authors><commentList> </commentList></comments>""" assert comments._get_author_list(fromstring(xml)) == ['Cuke', 'Not Cuke']
def read_sheets_titles(xml_source): """Read titles for all sheets.""" root = fromstring(xml_source) titles_root = root.find('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheets') return [sheet.get('name') for sheet in titles_root]
def write_worksheet(worksheet, string_table, style_table): """Write a worksheet to an xml file.""" if worksheet.xml_source: vba_root = fromstring(worksheet.xml_source) else: vba_root = None xml_file = StringIO() doc = XMLGenerator(out=xml_file, encoding='utf-8') start_tag(doc, 'worksheet', {'xml:space': 'preserve', 'xmlns': SHEET_MAIN_NS, 'xmlns:r': REL_NS}) if vba_root is not None: codename = vba_root.find('{%s}sheetPr' % SHEET_MAIN_NS).get('codeName', worksheet.title) start_tag(doc, 'sheetPr', {"codeName": codename}) else: start_tag(doc, 'sheetPr') tag(doc, 'outlinePr', {'summaryBelow': '%d' % (worksheet.show_summary_below), 'summaryRight': '%d' % (worksheet.show_summary_right)}) if worksheet.page_setup.fitToPage: tag(doc, 'pageSetUpPr', {'fitToPage':'1'}) end_tag(doc, 'sheetPr') tag(doc, 'dimension', {'ref': '%s' % worksheet.calculate_dimension()}) write_worksheet_sheetviews(doc, worksheet) tag(doc, 'sheetFormatPr', {'defaultRowHeight': '15'}) write_worksheet_cols(doc, worksheet, style_table) write_worksheet_data(doc, worksheet, string_table, style_table) if worksheet.auto_filter: tag(doc, 'autoFilter', {'ref': worksheet.auto_filter}) write_worksheet_mergecells(doc, worksheet) write_worksheet_datavalidations(doc, worksheet) write_worksheet_hyperlinks(doc, worksheet) for range_string, rules in worksheet.conditional_formatting.cf_rules.iteritems(): if not len(rules): # Skip if there are no rules. This is possible if a dataBar rule was read in and ignored. continue start_tag(doc, 'conditionalFormatting', {'sqref': range_string}) for rule in rules: if rule['type'] == 'dataBar': # Ignore - uses extLst tag which is currently unsupported. continue attr = {'type': rule['type']} for rule_attr in ConditionalFormatting.rule_attributes: if rule_attr in rule: attr[rule_attr] = str(rule[rule_attr]) start_tag(doc, 'cfRule', attr) if 'formula' in rule: for f in rule['formula']: tag(doc, 'formula', None, f) if 'colorScale' in rule: start_tag(doc, 'colorScale') for cfvo in rule['colorScale']['cfvo']: tag(doc, 'cfvo', cfvo) for color in rule['colorScale']['color']: if str(color.index).split(':')[0] == 'theme': # strip prefix theme if marked as such if str(color.index).split(':')[2]: tag(doc, 'color', {'theme': str(color.index).split(':')[1], 'tint': str(color.index).split(':')[2]}) else: tag(doc, 'color', {'theme': str(color.index).split(':')[1]}) else: tag(doc, 'color', {'rgb': str(color.index)}) end_tag(doc, 'colorScale') if 'iconSet' in rule: iconAttr = {} for icon_attr in ConditionalFormatting.icon_attributes: if icon_attr in rule['iconSet']: iconAttr[icon_attr] = rule['iconSet'][icon_attr] start_tag(doc, 'iconSet', iconAttr) for cfvo in rule['iconSet']['cfvo']: tag(doc, 'cfvo', cfvo) end_tag(doc, 'iconSet') end_tag(doc, 'cfRule') end_tag(doc, 'conditionalFormatting') options = worksheet.page_setup.options if options: tag(doc, 'printOptions', options) margins = worksheet.page_margins.margins if margins: tag(doc, 'pageMargins', margins) setup = worksheet.page_setup.setup if setup: tag(doc, 'pageSetup', setup) if worksheet.header_footer.hasHeader() or worksheet.header_footer.hasFooter(): start_tag(doc, 'headerFooter') if worksheet.header_footer.hasHeader(): tag(doc, 'oddHeader', None, worksheet.header_footer.getHeader()) if worksheet.header_footer.hasFooter(): tag(doc, 'oddFooter', None, worksheet.header_footer.getFooter()) end_tag(doc, 'headerFooter') if worksheet._charts or worksheet._images: tag(doc, 'drawing', {'r:id':'rId1'}) # if the sheet has an xml_source field then the workbook must have # been loaded with keep-vba true and we need to extract any control # elements. if vba_root is not None: for t in ('{%s}legacyDrawing' % SHEET_MAIN_NS, '{%s}controls' % SHEET_MAIN_NS): for elem in vba_root.findall(t): xml_file.write(re.sub(r' xmlns[^ >]*', '', tostring(elem).decode("utf-8"))) breaks = worksheet.page_breaks if breaks: start_tag(doc, 'rowBreaks', {'count': str(len(breaks)), 'manualBreakCount': str(len(breaks))}) for b in breaks: tag(doc, 'brk', {'id': str(b), 'man': 'true', 'max': '16383', 'min': '0'}) end_tag(doc, 'rowBreaks') end_tag(doc, 'worksheet') doc.endDocument() xml_string = xml_file.getvalue() xml_file.close() return xml_string
def read_workbook_settings(self, xml_source): root = fromstring(xml_source) view = root.find('*/' '{%s}workbookView' % SHEET_MAIN_NS) if 'activeTab' in view.attrib: self._active_sheet_index = int(view.attrib['activeTab'])
def write_worksheet(worksheet, string_table, style_table): """Write a worksheet to an xml file.""" if worksheet.xml_source: vba_root = fromstring(worksheet.xml_source) else: vba_root = None xml_file = StringIO() doc = XMLGenerator(out=xml_file, encoding='utf-8') start_tag(doc, 'worksheet', {'xmlns': SHEET_MAIN_NS, 'xmlns:r': REL_NS}) if vba_root is not None: el = vba_root.find('{%s}sheetPr' % SHEET_MAIN_NS) if el is not None: codename =el.get('codeName', worksheet.title) start_tag(doc, 'sheetPr', {"codeName": codename}) else: start_tag(doc, 'sheetPr') else: start_tag(doc, 'sheetPr') tag(doc, 'outlinePr', {'summaryBelow': '%d' % (worksheet.show_summary_below), 'summaryRight': '%d' % (worksheet.show_summary_right)}) if worksheet.page_setup.fitToPage: tag(doc, 'pageSetUpPr', {'fitToPage':'1'}) end_tag(doc, 'sheetPr') tag(doc, 'dimension', {'ref': '%s' % worksheet.calculate_dimension()}) write_worksheet_sheetviews(doc, worksheet) tag(doc, 'sheetFormatPr', {'defaultRowHeight': '15'}) write_worksheet_cols(doc, worksheet, style_table) write_worksheet_data(doc, worksheet, string_table, style_table) if worksheet.auto_filter: tag(doc, 'autoFilter', {'ref': worksheet.auto_filter}) write_worksheet_mergecells(doc, worksheet) write_worksheet_datavalidations(doc, worksheet) write_worksheet_hyperlinks(doc, worksheet) write_worksheet_conditional_formatting(doc, worksheet) options = worksheet.page_setup.options if options: tag(doc, 'printOptions', options) margins = worksheet.page_margins.margins if margins: tag(doc, 'pageMargins', margins) setup = worksheet.page_setup.setup if setup: tag(doc, 'pageSetup', setup) if worksheet.header_footer.hasHeader() or worksheet.header_footer.hasFooter(): start_tag(doc, 'headerFooter') if worksheet.header_footer.hasHeader(): tag(doc, 'oddHeader', None, worksheet.header_footer.getHeader()) if worksheet.header_footer.hasFooter(): tag(doc, 'oddFooter', None, worksheet.header_footer.getFooter()) end_tag(doc, 'headerFooter') if worksheet._charts or worksheet._images: tag(doc, 'drawing', {'r:id':'rId1'}) # If vba is being preserved then add a legacyDrawing element so # that any controls can be drawn. if vba_root is not None: el = vba_root.find('{%s}legacyDrawing' % SHEET_MAIN_NS) if el is not None: rId = el.get('{%s}id' % REL_NS) tag(doc, 'legacyDrawing', {'r:id': rId}) breaks = worksheet.page_breaks if breaks: start_tag(doc, 'rowBreaks', {'count': str(len(breaks)), 'manualBreakCount': str(len(breaks))}) for b in breaks: tag(doc, 'brk', {'id': str(b), 'man': 'true', 'max': '16383', 'min': '0'}) end_tag(doc, 'rowBreaks') # add a legacyDrawing so that excel can draw comments if worksheet._comment_count > 0: tag(doc, 'legacyDrawing', {'r:id':'commentsvml'}) end_tag(doc, 'worksheet') doc.endDocument() xml_string = xml_file.getvalue() xml_file.close() return xml_string
def write_theme(): """Write the theme xml.""" xml_node = fromstring( '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' '<a:theme xmlns:a="http://schemas.openxmlformats.org/' 'drawingml/2006/main" name="Office Theme">' '<a:themeElements>' '<a:clrScheme name="Office">' '<a:dk1><a:sysClr val="windowText" lastClr="000000"/></a:dk1>' '<a:lt1><a:sysClr val="window" lastClr="FFFFFF"/></a:lt1>' '<a:dk2><a:srgbClr val="1F497D"/></a:dk2>' '<a:lt2><a:srgbClr val="EEECE1"/></a:lt2>' '<a:accent1><a:srgbClr val="4F81BD"/></a:accent1>' '<a:accent2><a:srgbClr val="C0504D"/></a:accent2>' '<a:accent3><a:srgbClr val="9BBB59"/></a:accent3>' '<a:accent4><a:srgbClr val="8064A2"/></a:accent4>' '<a:accent5><a:srgbClr val="4BACC6"/></a:accent5>' '<a:accent6><a:srgbClr val="F79646"/></a:accent6>' '<a:hlink><a:srgbClr val="0000FF"/></a:hlink>' '<a:folHlink><a:srgbClr val="800080"/></a:folHlink>' '</a:clrScheme>' '<a:fontScheme name="Office">' '<a:majorFont>' '<a:latin typeface="Cambria"/>' '<a:ea typeface=""/>' '<a:cs typeface=""/>' '<a:font script="Jpan" typeface="MS Pゴシック"/>' '<a:font script="Hang" typeface="맑은 고딕"/>' '<a:font script="Hans" typeface="宋体"/>' '<a:font script="Hant" typeface="新細明體"/>' '<a:font script="Arab" typeface="Times New Roman"/>' '<a:font script="Hebr" typeface="Times New Roman"/>' '<a:font script="Thai" typeface="Tahoma"/>' '<a:font script="Ethi" typeface="Nyala"/>' '<a:font script="Beng" typeface="Vrinda"/>' '<a:font script="Gujr" typeface="Shruti"/>' '<a:font script="Khmr" typeface="MoolBoran"/>' '<a:font script="Knda" typeface="Tunga"/>' '<a:font script="Guru" typeface="Raavi"/>' '<a:font script="Cans" typeface="Euphemia"/>' '<a:font script="Cher" typeface="Plantagenet Cherokee"/>' '<a:font script="Yiii" typeface="Microsoft Yi Baiti"/>' '<a:font script="Tibt" typeface="Microsoft Himalaya"/>' '<a:font script="Thaa" typeface="MV Boli"/>' '<a:font script="Deva" typeface="Mangal"/>' '<a:font script="Telu" typeface="Gautami"/>' '<a:font script="Taml" typeface="Latha"/>' '<a:font script="Syrc" typeface="Estrangelo Edessa"/>' '<a:font script="Orya" typeface="Kalinga"/>' '<a:font script="Mlym" typeface="Kartika"/>' '<a:font script="Laoo" typeface="DokChampa"/>' '<a:font script="Sinh" typeface="Iskoola Pota"/>' '<a:font script="Mong" typeface="Mongolian Baiti"/>' '<a:font script="Viet" typeface="Times New Roman"/>' '<a:font script="Uigh" typeface="Microsoft Uighur"/>' '</a:majorFont>' '<a:minorFont>' '<a:latin typeface="Calibri"/>' '<a:ea typeface=""/>' '<a:cs typeface=""/>' '<a:font script="Jpan" typeface="MS Pゴシック"/>' '<a:font script="Hang" typeface="맑은 고딕"/>' '<a:font script="Hans" typeface="宋体"/>' '<a:font script="Hant" typeface="新細明體"/>' '<a:font script="Arab" typeface="Arial"/>' '<a:font script="Hebr" typeface="Arial"/>' '<a:font script="Thai" typeface="Tahoma"/>' '<a:font script="Ethi" typeface="Nyala"/>' '<a:font script="Beng" typeface="Vrinda"/>' '<a:font script="Gujr" typeface="Shruti"/>' '<a:font script="Khmr" typeface="DaunPenh"/>' '<a:font script="Knda" typeface="Tunga"/>' '<a:font script="Guru" typeface="Raavi"/>' '<a:font script="Cans" typeface="Euphemia"/>' '<a:font script="Cher" typeface="Plantagenet Cherokee"/>' '<a:font script="Yiii" typeface="Microsoft Yi Baiti"/>' '<a:font script="Tibt" typeface="Microsoft Himalaya"/>' '<a:font script="Thaa" typeface="MV Boli"/>' '<a:font script="Deva" typeface="Mangal"/>' '<a:font script="Telu" typeface="Gautami"/>' '<a:font script="Taml" typeface="Latha"/>' '<a:font script="Syrc" typeface="Estrangelo Edessa"/>' '<a:font script="Orya" typeface="Kalinga"/>' '<a:font script="Mlym" typeface="Kartika"/>' '<a:font script="Laoo" typeface="DokChampa"/>' '<a:font script="Sinh" typeface="Iskoola Pota"/>' '<a:font script="Mong" typeface="Mongolian Baiti"/>' '<a:font script="Viet" typeface="Arial"/>' '<a:font script="Uigh" typeface="Microsoft Uighur"/>' '</a:minorFont>' '</a:fontScheme>' '<a:fmtScheme name="Office">' '<a:fillStyleLst>' '<a:solidFill><a:schemeClr val="phClr"/></a:solidFill>' '<a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:tint val="50000"/>' '<a:satMod val="300000"/></a:schemeClr></a:gs>' '<a:gs pos="35000"><a:schemeClr val="phClr"><a:tint val="37000"/>' '<a:satMod val="300000"/></a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr"><a:tint val="15000"/>' '<a:satMod val="350000"/></a:schemeClr></a:gs></a:gsLst>' '<a:lin ang="16200000" scaled="1"/></a:gradFill>' '<a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:shade val="51000"/>' '<a:satMod val="130000"/></a:schemeClr></a:gs>' '<a:gs pos="80000"><a:schemeClr val="phClr"><a:shade val="93000"/>' '<a:satMod val="130000"/></a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr">' '<a:shade val="94000"/>' '<a:satMod val="135000"/></a:schemeClr></a:gs></a:gsLst>' '<a:lin ang="16200000" scaled="0"/></a:gradFill></a:fillStyleLst>' '<a:lnStyleLst>' '<a:ln w="9525" cap="flat" cmpd="sng" algn="ctr">' '<a:solidFill><a:schemeClr val="phClr"><a:shade val="95000"/>' '<a:satMod val="105000"/></a:schemeClr></a:solidFill>' '<a:prstDash val="solid"/></a:ln>' '<a:ln w="25400" cap="flat" cmpd="sng" algn="ctr"><a:solidFill>' '<a:schemeClr val="phClr"/></a:solidFill>' '<a:prstDash val="solid"/></a:ln>' '<a:ln w="38100" cap="flat" cmpd="sng" algn="ctr"><a:solidFill>' '<a:schemeClr val="phClr"/></a:solidFill>' '<a:prstDash val="solid"/></a:ln></a:lnStyleLst>' '<a:effectStyleLst><a:effectStyle><a:effectLst>' '<a:outerShdw blurRad="40000" dist="20000" dir="5400000" ' 'rotWithShape="0"><a:srgbClr val="000000">' '<a:alpha val="38000"/></a:srgbClr></a:outerShdw></a:effectLst>' '</a:effectStyle><a:effectStyle><a:effectLst>' '<a:outerShdw blurRad="40000" dist="23000" dir="5400000" ' 'rotWithShape="0"><a:srgbClr val="000000">' '<a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst>' '</a:effectStyle><a:effectStyle><a:effectLst>' '<a:outerShdw blurRad="40000" dist="23000" dir="5400000" ' 'rotWithShape="0"><a:srgbClr val="000000">' '<a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst>' '<a:scene3d><a:camera prst="orthographicFront">' '<a:rot lat="0" lon="0" rev="0"/></a:camera>' '<a:lightRig rig="threePt" dir="t">' '<a:rot lat="0" lon="0" rev="1200000"/></a:lightRig>' '</a:scene3d><a:sp3d><a:bevelT w="63500" h="25400"/>' '</a:sp3d></a:effectStyle></a:effectStyleLst>' '<a:bgFillStyleLst><a:solidFill><a:schemeClr val="phClr"/>' '</a:solidFill><a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:tint val="40000"/>' '<a:satMod val="350000"/></a:schemeClr></a:gs>' '<a:gs pos="40000"><a:schemeClr val="phClr"><a:tint val="45000"/>' '<a:shade val="99000"/><a:satMod val="350000"/>' '</a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr">' '<a:shade val="20000"/><a:satMod val="255000"/>' '</a:schemeClr></a:gs></a:gsLst>' '<a:path path="circle">' '<a:fillToRect l="50000" t="-80000" r="50000" b="180000"/>' '</a:path>' '</a:gradFill><a:gradFill rotWithShape="1"><a:gsLst>' '<a:gs pos="0"><a:schemeClr val="phClr"><a:tint val="80000"/>' '<a:satMod val="300000"/></a:schemeClr></a:gs>' '<a:gs pos="100000"><a:schemeClr val="phClr">' '<a:shade val="30000"/><a:satMod val="200000"/>' '</a:schemeClr></a:gs></a:gsLst>' '<a:path path="circle">' '<a:fillToRect l="50000" t="50000" r="50000" b="50000"/></a:path>' '</a:gradFill></a:bgFillStyleLst></a:fmtScheme>' '</a:themeElements>' '<a:objectDefaults/><a:extraClrSchemeLst/>' '</a:theme>') return get_document_content(xml_node)
def fast_parse(ws, xml_source, string_table, style_table): xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) mergeCells = root.find(QName(xmlns, 'mergeCells').text) if mergeCells is not None: mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text) for mergeCell in mergeCellNodes: ws.merge_cells(mergeCell.get('ref')) source = _get_xml_iter(xml_source) it = iterparse(source) for event, element in filter(filter_cells, it): value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') coordinate = element.get('r') style_id = element.get('s') if style_id is not None: ws._styles[coordinate] = style_table.get(int(style_id)) if value is not None: data_type = element.get('t', 'n') if data_type == Cell.TYPE_STRING: value = string_table.get(int(value)) ws.cell(coordinate).value = value # to avoid memory exhaustion, clear the item after use element.clear() cols = root.find(QName(xmlns, 'cols').text) if cols is not None: colNodes = cols.findall(QName(xmlns, 'col').text) for col in colNodes: min = int(col.get('min')) if col.get('min') else 1 max = int(col.get('max')) if col.get('max') else 1 for colId in range(min, max + 1): column = get_column_letter(colId) if column not in ws.column_dimensions: ws.column_dimensions[column] = ColumnDimension(column) if col.get('width') is not None: ws.column_dimensions[column].width = float(col.get('width')) if col.get('bestFit') == '1': ws.column_dimensions[column].auto_size = True if col.get('hidden') == '1': ws.column_dimensions[column].visible = False if col.get('outlineLevel') is not None: ws.column_dimensions[column].outline_level = int(col.get('outlineLevel')) if col.get('collapsed') == '1': ws.column_dimensions[column].collapsed = True if col.get('style') is not None: ws.column_dimensions[column].style_index = col.get('style') printOptions = root.find(QName(xmlns, 'printOptions').text) if printOptions is not None: if printOptions.get('horizontalCentered') is not None: ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered') if printOptions.get('verticalCentered') is not None: ws.page_setup.verticalCentered = printOptions.get('verticalCentered') pageMargins = root.find(QName(xmlns, 'pageMargins').text) if pageMargins is not None: if pageMargins.get('left') is not None: ws.page_margins.left = float(pageMargins.get('left')) if pageMargins.get('right') is not None: ws.page_margins.right = float(pageMargins.get('right')) if pageMargins.get('top') is not None: ws.page_margins.top = float(pageMargins.get('top')) if pageMargins.get('bottom') is not None: ws.page_margins.bottom = float(pageMargins.get('bottom')) if pageMargins.get('header') is not None: ws.page_margins.header = float(pageMargins.get('header')) if pageMargins.get('footer') is not None: ws.page_margins.footer = float(pageMargins.get('footer')) pageSetup = root.find(QName(xmlns, 'pageSetup').text) if pageSetup is not None: if pageSetup.get('orientation') is not None: ws.page_setup.orientation = pageSetup.get('orientation') if pageSetup.get('paperSize') is not None: ws.page_setup.paperSize = pageSetup.get('paperSize') if pageSetup.get('scale') is not None: ws.page_setup.top = pageSetup.get('scale') if pageSetup.get('fitToPage') is not None: ws.page_setup.fitToPage = pageSetup.get('fitToPage') if pageSetup.get('fitToHeight') is not None: ws.page_setup.fitToHeight = pageSetup.get('fitToHeight') if pageSetup.get('fitToWidth') is not None: ws.page_setup.fitToWidth = pageSetup.get('fitToWidth') if pageSetup.get('firstPageNumber') is not None: ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber') if pageSetup.get('useFirstPageNumber') is not None: ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber') headerFooter = root.find(QName(xmlns, 'headerFooter').text) if headerFooter is not None: oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text) if oddHeader is not None: ws.header_footer.setHeader(oddHeader.text) oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text) if oddFooter is not None: ws.header_footer.setFooter(oddFooter.text)
def read_style_table(xml_source): """Read styles from the shared style table""" table = {} xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' root = fromstring(xml_source) custom_num_formats = parse_custom_num_formats(root, xmlns) color_index = parse_color_index(root, xmlns) font_list = parse_fonts(root, xmlns, color_index) fill_list = parse_fills(root, xmlns, color_index) border_list = parse_borders(root, xmlns, color_index) builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs = root.find(QName(xmlns, 'cellXfs').text) if cell_xfs is not None: # can happen on bad OOXML writers (e.g. Gnumeric) cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style() number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: new_style.number_format.format_code = \ builtin_formats.get(number_format_id, 'General') else: if number_format_id in custom_num_formats: new_style.number_format.format_code = \ custom_num_formats[number_format_id] else: raise MissingNumberFormat('%s' % number_format_id) if cell_xfs_node.get('applyAlignment') == '1': alignment = cell_xfs_node.find(QName(xmlns, 'alignment').text) if alignment is not None: if alignment.get('horizontal') is not None: new_style.alignment.horizontal = alignment.get( 'horizontal') if alignment.get('vertical') is not None: new_style.alignment.vertical = alignment.get( 'vertical') if alignment.get('wrapText'): new_style.alignment.wrap_text = True if alignment.get('shrinkToFit'): new_style.alignment.shrink_to_fit = True if alignment.get('indent') is not None: new_style.alignment.ident = int( alignment.get('indent')) if alignment.get('textRotation') is not None: new_style.alignment.text_rotation = int( alignment.get('textRotation')) # ignore justifyLastLine option when horizontal = distributed if cell_xfs_node.get('applyFont') == '1': new_style.font = deepcopy(font_list[int( cell_xfs_node.get('fontId'))]) new_style.font.color = deepcopy(font_list[int( cell_xfs_node.get('fontId'))].color) if cell_xfs_node.get('applyFill') == '1': new_style.fill = deepcopy(fill_list[int( cell_xfs_node.get('fillId'))]) new_style.fill.start_color = deepcopy(fill_list[int( cell_xfs_node.get('fillId'))].start_color) new_style.fill.end_color = deepcopy(fill_list[int( cell_xfs_node.get('fillId'))].end_color) if cell_xfs_node.get('applyBorder') == '1': new_style.borders = deepcopy(border_list[int( cell_xfs_node.get('borderId'))]) new_style.borders.left = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].left) new_style.borders.left.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].left.color) new_style.borders.right = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].right) new_style.borders.right.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].right.color) new_style.borders.top = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].top) new_style.borders.top.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].top.color) new_style.borders.bottom = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].bottom) new_style.borders.bottom.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].bottom.color) new_style.borders.diagonal = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].diagonal) new_style.borders.diagonal.color = deepcopy(border_list[int( cell_xfs_node.get('borderId'))].diagonal.color) if cell_xfs_node.get('applyProtection') == '1': protection = cell_xfs_node.find( QName(xmlns, 'protection').text) # Ignore if there are no protection sub-nodes if protection is not None: if protection.get('locked') is not None: if protection.get('locked') == '1': new_style.protection.locked = Protection.PROTECTION_PROTECTED else: new_style.protection.locked = Protection.PROTECTION_UNPROTECTED if protection.get('hidden') is not None: if protection.get('hidden') == '1': new_style.protection.hidden = Protection.PROTECTION_PROTECTED else: new_style.protection.hidden = Protection.PROTECTION_UNPROTECTED table[index] = new_style return table
def write_content_types(workbook): """Write the content-types xml.""" seen = set() if workbook.vba_archive: root = fromstring(workbook.vba_archive.read(ARC_CONTENT_TYPES)) for elem in root.findall('{%s}Override' % CONTYPES_NS): seen.add(elem.attrib['PartName']) else: root = Element('{%s}Types' % CONTYPES_NS) for setting_type, name, content_type in static_content_types_config: if setting_type == 'Override': tag = '{%s}Override' % CONTYPES_NS attrib = {'PartName': '/' + name} else: tag = '{%s}Default' % CONTYPES_NS attrib = {'Extension': name} attrib['ContentType'] = content_type SubElement(root, tag, attrib) drawing_id = 1 chart_id = 1 comments_id = 1 for sheet_id, sheet in enumerate(workbook.worksheets): name = '/xl/worksheets/sheet%d.xml' % (sheet_id + 1) if name not in seen: SubElement( root, '{%s}Override' % CONTYPES_NS, { 'PartName': name, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml' }) if sheet._charts or sheet._images: name = '/xl/drawings/drawing%d.xml' % drawing_id if name not in seen: SubElement( root, '{%s}Override' % CONTYPES_NS, { 'PartName': name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawing+xml' }) drawing_id += 1 for chart in sheet._charts: name = '/xl/charts/chart%d.xml' % chart_id if name not in seen: SubElement( root, '{%s}Override' % CONTYPES_NS, { 'PartName': name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml' }) chart_id += 1 if chart._shapes: name = '/xl/drawings/drawing%d.xml' % drawing_id if name not in seen: SubElement( root, '{%s}Override' % CONTYPES_NS, { 'PartName': name, 'ContentType': 'application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml' }) drawing_id += 1 if sheet._comment_count > 0: SubElement( root, '{%s}Override' % CONTYPES_NS, { 'PartName': '/xl/comments%d.xml' % comments_id, 'ContentType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml' }) comments_id += 1 return get_document_content(root)