def _write_chartsheets(self, archive): from openpyxl.packaging.relationship import Relationship, RelationshipList from openpyxl.worksheet.drawing import Drawing for idx, sheet in enumerate(self.workbook.chartsheets, 1): sheet._path = "sheet{0}.xml".format(idx) arc_path = "{0}/{1}".format(PACKAGE_CHARTSHEETS, sheet._path) rels_path = get_rels_path(arc_path) xml = tostring(sheet.to_tree()) archive.writestr(arc_path, xml) if sheet._charts: drawing = SpreadsheetDrawing() drawing.charts = sheet._charts self.workbook._drawings.append(drawing) drawing_id = len(self.workbook._drawings) drawingpath = "{0}/drawing{1}.xml".format(PACKAGE_DRAWINGS, drawing_id) archive.writestr(drawingpath, tostring(drawing._write())) archive.writestr( "{0}/_rels/drawing{1}.xml.rels".format( PACKAGE_DRAWINGS, drawing_id), tostring(drawing._write_rels()) ) rel = Relationship(type="drawing", Target="/" + drawingpath) rels = RelationshipList() rels.append(rel) tree = rels.to_tree() archive.writestr(rels_path, tostring(tree) )
def read_external_link(archive, book_path): src = archive.read(book_path) node = fromstring(src) book = ExternalLink.from_tree(node) link_path = get_rels_path(book_path) deps = get_dependents(archive, link_path) book.file_link = deps.Relationship[0] return book
def _write_external_links(self): # delegate to object """Write links to external workbooks""" wb = self.workbook for idx, link in enumerate(wb._external_links, 1): link._id = idx rels_path = get_rels_path(link.path[1:]) xml = link.to_tree() self._archive.writestr(link.path[1:], tostring(xml)) rels = RelationshipList() rels.append(link.file_link) self._archive.writestr(rels_path, tostring(rels.to_tree())) self.manifest.append(link)
def _write_worksheets(self): pivot_caches = set() for idx, ws in enumerate(self.workbook.worksheets, 1): ws._id = idx xml = ws._write() rels_path = get_rels_path(ws.path)[1:] self._archive.writestr(ws.path[1:], xml) self.manifest.append(ws) if ws._drawing: self._write_drawing(ws._drawing) for r in ws._rels.Relationship: if "drawing" in r.Type: r.Target = ws._drawing.path if ws._comments: self._write_comment(ws) if ws.legacy_drawing is not None: shape_rel = Relationship(type="vmlDrawing", Id="anysvml", Target="/" + ws.legacy_drawing) ws._rels.append(shape_rel) for t in ws._tables: self._tables.append(t) t.id = len(self._tables) t._write(self._archive) self.manifest.append(t) ws._rels[t._rel_id].Target = t.path for p in ws._pivots: if p.cache not in pivot_caches: pivot_caches.add(p.cache) p.cache._id = len(pivot_caches) self._pivots.append(p) p._id = len(self._pivots) p._write(self._archive, self.manifest) self.workbook._pivots.append(p) r = Relationship(Type=p.rel_type, Target=p.path) ws._rels.append(r) if ws._rels: tree = ws._rels.to_tree() self._archive.writestr(rels_path, tostring(tree))
def _write_external_links(self, archive): """Write links to external workbooks""" wb = self.workbook for idx, link in enumerate(wb._external_links, 1): link._path = "{0}{1}.xml".format(link._rel_type, idx) arc_path = "{0}/{1}s/{2}".format(PACKAGE_XL, link._rel_type, link._path) rels_path = get_rels_path(arc_path) xml = link.to_tree() archive.writestr(arc_path, tostring(xml)) rels = RelationshipList() rels.append(link.file_link) archive.writestr(rels_path, tostring(rels.to_tree()))
def _write_drawing(self, drawing): """ Write a drawing """ self._drawings.append(drawing) drawing._id = len(self._drawings) for chart in drawing.charts: self._charts.append(chart) chart._id = len(self._charts) for img in drawing.images: self._images.append(img) img._id = len(self._images) rels_path = get_rels_path(drawing.path)[1:] self._archive.writestr(drawing.path[1:], tostring(drawing._write())) self._archive.writestr(rels_path, tostring(drawing._write_rels())) self.manifest.append(drawing)
def _write_worksheets(self, archive): comments_id = 0 for idx, sheet in enumerate(self.workbook.worksheets, 1): xml = sheet._write(self.workbook.shared_strings) sheet._path = "sheet{0}.xml".format(idx) arc_path = "{0}/{1}".format(PACKAGE_WORKSHEETS, sheet._path) rels_path = get_rels_path(arc_path) archive.writestr(arc_path, xml) if sheet._charts or sheet._images: drawing = SpreadsheetDrawing() drawing.charts = sheet._charts drawing.images = sheet._images self.workbook._drawings.append(drawing) drawing_id = len(self.workbook._drawings) drawingpath = "{0}/drawing{1}.xml".format(PACKAGE_DRAWINGS, drawing_id) archive.writestr(drawingpath, tostring(drawing._write())) archive.writestr("{0}/_rels/drawing{1}.xml.rels".format(PACKAGE_DRAWINGS, drawing_id), tostring(drawing._write_rels())) for r in sheet._rels: if "drawing" in r.Type: r.Target = "/" + drawingpath if sheet._comments: comments_id += 1 cw = self.comment_writer(sheet) archive.writestr(PACKAGE_XL + '/comments%d.xml' % comments_id, cw.write_comments()) if sheet.legacy_drawing is not None: vmlroot = fromstring(self.workbook.vba_archive.read(sheet.legacy_drawing)) archive.writestr(sheet.legacy_drawing, cw.write_comments_vml(vmlroot)) # Record this file so we don't write it again when we dump out vba_archive self.vba_modified.add(sheet.legacy_drawing) else: vmlroot = Element("xml") archive.writestr(PACKAGE_XL + '/drawings/commentsDrawing%d.vml' % comments_id, cw.write_comments_vml(vmlroot)) if (sheet._rels or sheet._comments or sheet.legacy_drawing is not None): rels = write_rels(sheet, comments_id=comments_id) archive.writestr(rels_path, tostring(rels))
def _write_rels(self, archive, manifest): """ Write the relevant child objects and add links """ if self.records is None: return rels = RelationshipList() r = Relationship(Type=self.records.rel_type, Target=self.records.path) rels.append(r) self.id = r.id self.records._id = self._id self.records._write(archive, manifest) path = get_rels_path(self.path) xml = tostring(rels.to_tree()) archive.writestr(path[1:], xml)
def _write_chartsheets(self): for idx, sheet in enumerate(self.workbook.chartsheets, 1): sheet._id = idx xml = tostring(sheet.to_tree()) self._archive.writestr(sheet.path[1:], xml) self.manifest.append(sheet) if sheet._drawing: self._write_drawing(sheet._drawing) rel = Relationship(type="drawing", Target=sheet._drawing.path) rels = RelationshipList() rels.append(rel) tree = rels.to_tree() rels_path = get_rels_path(sheet.path[1:]) self._archive.writestr(rels_path, tostring(tree))
def read_chartsheet(self, sheet, rel): sheet_path = rel.target rels_path = get_rels_path(sheet_path) rels = [] if rels_path in self.valid_files: rels = get_dependents(self.archive, rels_path) with self.archive.open(sheet_path, "r") as src: xml = src.read() node = fromstring(xml) cs = Chartsheet.from_tree(node) cs._parent = self.wb cs.title = sheet.name self.wb._add_sheet(cs) drawings = rels.find(SpreadsheetDrawing._rel_type) for rel in drawings: charts, images = find_images(self.archive, rel.target) for c in charts: cs.add_chart(c)
def find_charts(archive, path): """ Given the path to a drawing file extract anchors with charts """ src = archive.read(path) tree = fromstring(src) drawing = SpreadsheetDrawing.from_tree(tree) rels_path = get_rels_path(path) deps = [] if rels_path in archive.namelist(): deps = get_dependents(archive, rels_path) charts = [] for rel in drawing._chart_rels: cs = get_rel(archive, deps, rel.id, ChartSpace) chart = read_chart(cs) chart.anchor = rel.anchor charts.append(chart) return charts
def find_images(archive, path): """ Given the path to a drawing file extract anchors with images """ src = archive.read(path) tree = fromstring(src) drawing = SpreadsheetDrawing.from_tree(tree) rels_path = get_rels_path(path) deps = [] if rels_path in archive.namelist(): deps = get_dependents(archive, rels_path) images = [] for rel in drawing._blip_rels: dep = deps[rel.embed] if dep.Type == IMAGE_NS: image = Image(BytesIO(archive.read(dep.target))) image.anchor = rel.anchor images.append(image) return images
def find_images(archive, path): """ Given the path to a drawing file extract charts and images Ingore errors due to unsupported parts of DrawingML """ src = archive.read(path) tree = fromstring(src) try: drawing = SpreadsheetDrawing.from_tree(tree) except TypeError: warn( "DrawingML support is incomplete and limited to charts and images only. Shapes and drawings will be lost." ) return [], [] rels_path = get_rels_path(path) deps = [] if rels_path in archive.namelist(): deps = get_dependents(archive, rels_path) charts = [] for rel in drawing._chart_rels: cs = get_rel(archive, deps, rel.id, ChartSpace) chart = read_chart(cs) chart.anchor = rel.anchor charts.append(chart) images = [] for rel in drawing._blip_rels: dep = deps[rel.embed] if dep.Type == IMAGE_NS: image = Image(BytesIO(archive.read(dep.target))) image.anchor = rel.anchor images.append(image) return charts, images
def _write_worksheets(self): for idx, ws in enumerate(self.workbook.worksheets, 1): ws._id = idx xml = ws._write() rels_path = get_rels_path(ws.path)[1:] self._archive.writestr(ws.path[1:], xml) self.manifest.append(ws) if ws._drawing: self._write_drawing(ws._drawing) for r in ws._rels.Relationship: if "drawing" in r.Type: r.Target = ws._drawing.path if ws._comments: self._write_comment(ws) if ws.legacy_drawing is not None: shape_rel = Relationship(type="vmlDrawing", Id="anysvml", Target="/" + ws.legacy_drawing) ws._rels.append(shape_rel) for t in ws._tables: self._tables.append(t) t.id = len(self._tables) t._write(self._archive) self.manifest.append(t) ws._rels[t._rel_id].Target = t.path if ws._rels: tree = ws._rels.to_tree() self._archive.writestr(rels_path, tostring(tree))
def rels(self): if self._rels is None: self._rels = get_dependents(self.archive, get_rels_path(self.workbook_part_name)) return self._rels
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, guess_types=False, keep_links=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :param keep_links: whether links to external workbooks should be preserved. The default is True :type keep_links: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only src = archive.read(ARC_CONTENT_TYPES) root = fromstring(src) package = Manifest.from_tree(root) wb_part = _find_workbook_part(package) parser = WorkbookParser(archive, wb_part.PartName[1:]) wb = parser.wb wb._data_only = data_only wb._read_only = read_only wb._keep_links = keep_links wb.guess_types = guess_types wb.template = wb_part.ContentType in (XLTX, XLTM) parser.parse() wb._sheets = [] if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) for name in archive.namelist(): wb.vba_archive.writestr(name, archive.read(name)) if read_only: wb._archive = ZipFile(filename) # get workbook-level information if ARC_CORE in valid_files: src = fromstring(archive.read(ARC_CORE)) wb.properties = DocumentProperties.from_tree(src) shared_strings = [] ct = package.find(SHARED_STRINGS) if ct is not None: strings_path = ct.PartName[1:] shared_strings = read_string_table(archive.read(strings_path)) if ARC_THEME in valid_files: wb.loaded_theme = archive.read(ARC_THEME) apply_stylesheet(archive, wb) # bind styles to workbook pivot_caches = parser.pivot_caches # get worksheets for sheet, rel in parser.find_sheets(): sheet_name = sheet.name worksheet_path = rel.target rels_path = get_rels_path(worksheet_path) rels = [] if rels_path in valid_files: rels = get_dependents(archive, rels_path) if not worksheet_path in valid_files: continue if read_only: ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._sheets.append(ws) else: fh = archive.open(worksheet_path) ws = wb.create_sheet(sheet_name) ws._rels = rels ws_parser = WorkSheetParser(ws, fh, shared_strings) ws_parser.parse() if rels: # assign any comments to cells for r in rels.find(COMMENTS_NS): src = archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if ( wb.vba_archive is not None and ws.legacy_drawing is not None ): ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state ws._rels = [] # reset parser.assign_names() #wb._differential_styles.styles = [] # tables may depened upon dxf archive.close() return wb
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, guess_types=False, keep_links=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :param keep_links: whether links to external workbooks should be preserved. The default is True :type keep_links: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only parser = WorkbookParser(archive) wb = parser.wb wb._data_only = data_only wb._read_only = read_only wb._keep_links = keep_links wb.guess_types = guess_types parser.parse() wb._sheets = [] if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) for name in archive.namelist(): wb.vba_archive.writestr(name, archive.read(name)) if read_only: wb._archive = ZipFile(filename) # get workbook-level information if ARC_CORE in valid_files: src = fromstring(archive.read(ARC_CORE)) wb.properties = DocumentProperties.from_tree(src) # is workbook a template or note src = archive.read(ARC_CONTENT_TYPES) root = fromstring(src) package = Manifest.from_tree(root) wb.template = XLTX in package or XLTM in package shared_strings = [] ct = package.find(SHARED_STRINGS) if ct is not None: strings_path = ct.PartName[1:] shared_strings = read_string_table(archive.read(strings_path)) if ARC_THEME in valid_files: wb.loaded_theme = archive.read(ARC_THEME) apply_stylesheet(archive, wb) # bind styles to workbook # get worksheets for sheet, rel in parser.find_sheets(): sheet_name = sheet.name worksheet_path = rel.target rels_path = get_rels_path(worksheet_path) rels = [] if rels_path in valid_files: rels = get_dependents(archive, rels_path) if not worksheet_path in valid_files: continue if read_only: ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._sheets.append(ws) else: fh = archive.open(worksheet_path) ws = wb.create_sheet(sheet_name) ws._rels = rels ws_parser = WorkSheetParser(ws, fh, shared_strings) ws_parser.parse() if rels: # assign any comments to cells for r in rels.find(COMMENTS_NS): src = archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if ( wb.vba_archive is not None and ws.legacy_drawing is not None ): ws.legacy_drawing = rels[ws.legacy_drawing].target ws.sheet_state = sheet.state ws._rels = [] # reset parser.assign_names() wb._differential_styles.styles = [] archive.close() return wb
def read_worksheets(self): for sheet, rel in self.parser.find_sheets(): if rel.target not in self.valid_files: continue if "chartsheet" in rel.Type: self.read_chartsheet(sheet, rel) continue rels_path = get_rels_path(rel.target) rels = RelationshipList() if rels_path in self.valid_files: rels = get_dependents(self.archive, rels_path) if self.read_only: ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings) self.wb._sheets.append(ws) continue else: fh = self.archive.open(rel.target) ws = self.wb.create_sheet(sheet.name) ws._rels = rels ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only) ws_parser.bind_all() # assign any comments to cells for r in rels.find(COMMENTS_NS): src = self.archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if self.wb.vba_archive and ws.legacy_drawing: ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = self.archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) drawings = rels.find(SpreadsheetDrawing._rel_type) for rel in drawings: charts, images = find_images(self.archive, rel.target) for c in charts: ws.add_chart(c, c.anchor) for im in images: ws.add_image(im, im.anchor) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = self.archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = self.parser.pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state
def read_worksheets(self): comment_warning = """Cell '{0}':{1} is part of a merged range but has a comment which will be removed because merged cells cannot contain any data.""" for sheet, rel in self.parser.find_sheets(): if rel.target not in self.valid_files: continue if "chartsheet" in rel.Type: self.read_chartsheet(sheet, rel) continue rels_path = get_rels_path(rel.target) rels = RelationshipList() if rels_path in self.valid_files: rels = get_dependents(self.archive, rels_path) # Used Worksheet and WorksheetReader for all scenarios, # not distinguishing between read_only or not fh = self.archive.open(rel.target) ws = self.wb.create_sheet(sheet.name) ws._rels = rels ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only, self.read_only) ws_parser.bind_all() # assign any comments to cells for r in rels.find(COMMENTS_NS): src = self.archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: try: ws[ref].comment = comment except AttributeError: c = ws[ref] if isinstance(c, MergedCell): warnings.warn( comment_warning.format(ws.title, c.coordinate)) continue # preserve link to VML file if VBA if self.wb.vba_archive and ws.legacy_drawing: ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = self.archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) drawings = rels.find(SpreadsheetDrawing._rel_type) for rel in drawings: charts, images = find_images(self.archive, rel.target) for c in charts: ws.add_chart(c, c.anchor) for im in images: ws.add_image(im, im.anchor) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = self.archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = self.parser.pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state