def test_append(self, Manifest): from openpyxl25 import Workbook wb = Workbook() ws = wb.active manifest = Manifest() manifest.append(ws) assert len(manifest.Override) == 6
def test_write(self, Manifest): mf = Manifest() from openpyxl25 import Workbook wb = Workbook() archive = ZipFile(BytesIO(), "w") mf._write(archive, wb) assert "/xl/workbook.xml" in mf.filenames
def test_media(self, Manifest, file, registration): from openpyxl25 import Workbook wb = Workbook() manifest = Manifest() manifest._register_mimetypes([file]) xml = tostring(manifest.Default[-1].to_tree()) diff = compare_xml(xml, registration) assert diff is None, diff
def test_write(self, DummyCache): out = BytesIO() archive = ZipFile(out, mode="w") manifest = Manifest() xml = tostring(DummyCache.to_tree()) DummyCache._write(archive, manifest) assert archive.namelist() == [DummyCache.path[1:]] assert manifest.find(DummyCache.mime_type)
def __init__(self, workbook, archive): self._archive = archive self.workbook = workbook self.manifest = Manifest() self.vba_modified = set() self._tables = [] self._charts = [] self._images = [] self._drawings = [] self._comments = [] self._pivots = []
def test_write(self, RecordList): out = BytesIO() archive = ZipFile(out, mode="w") manifest = Manifest() records = RecordList() xml = tostring(records.to_tree()) records._write(archive, manifest) manifest.append(records) assert archive.namelist() == [records.path[1:]] assert manifest.find(records.mime_type)
def test_from_xml(self, datadir, Manifest): datadir.chdir() with open("manifest.xml") as src: node = fromstring(src.read()) manifest = Manifest.from_tree(node) assert len(manifest.Default) == 2 defaults = [ ("application/xml", 'xml'), ("application/vnd.openxmlformats-package.relationships+xml", 'rels'), ] assert [(ct.ContentType, ct.Extension) for ct in manifest.Default] == defaults overrides = [ ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml', '/xl/workbook.xml'), ('application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml', '/xl/worksheets/sheet1.xml'), ('application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml', '/xl/chartsheets/sheet1.xml'), ('application/vnd.openxmlformats-officedocument.theme+xml', '/xl/theme/theme1.xml'), ('application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml', '/xl/styles.xml'), ('application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml', '/xl/sharedStrings.xml'), ('application/vnd.openxmlformats-officedocument.drawing+xml', '/xl/drawings/drawing1.xml'), ('application/vnd.openxmlformats-officedocument.drawingml.chart+xml', '/xl/charts/chart1.xml'), ('application/vnd.openxmlformats-package.core-properties+xml', '/docProps/core.xml'), ('application/vnd.openxmlformats-officedocument.extended-properties+xml', '/docProps/app.xml') ] assert [(ct.ContentType, ct.PartName) for ct in manifest.Override] == overrides
def test_findall(self, datadir, Manifest): datadir.chdir() with open("manifest.xml", "rb") as src: xml = src.read() tree = fromstring(xml) manifest = Manifest.from_tree(tree) sheets = manifest.findall(WORKSHEET_TYPE) assert len(list(sheets)) == 1
def test_exts(self, datadir, Manifest): datadir.chdir() with open("manifest.xml") as src: node = fromstring(src.read()) manifest = Manifest.from_tree(node) assert manifest.extensions == [ ('xml', 'application/xml'), ]
def test_find(self, datadir, Manifest): datadir.chdir() with open("manifest.xml", "rb") as src: xml = src.read() tree = fromstring(xml) manifest = Manifest.from_tree(tree) ws = manifest.find(WORKSHEET_TYPE) assert ws.PartName == "/xl/worksheets/sheet1.xml"
def test_vba(self, datadir, Manifest): datadir.chdir() from openpyxl25 import load_workbook wb = load_workbook('sample.xlsm', keep_vba=True) manifest = Manifest() manifest._write_vba(wb) partnames = set([t.PartName for t in manifest.Override]) expected = set([ '/xl/workbook.xml', '/xl/worksheets/sheet1.xml', '/xl/worksheets/sheet2.xml', '/xl/worksheets/sheet3.xml', '/xl/theme/theme1.xml', '/xl/styles.xml', '/docProps/core.xml', '/docProps/app.xml', '/xl/sharedStrings.xml' ]) assert partnames == expected
def test_overwritten_default(): from openpyxl25.reader.excel import _find_workbook_part src = """ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> <Default Extension="xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/> </Types> """ node = fromstring(src) package = Manifest.from_tree(node) assert _find_workbook_part(package) == Override("/xl/workbook.xml", XLSX)
def test_ctor(self, Manifest): manifest = Manifest() xml = tostring(manifest.to_tree()) expected = """ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> <Default ContentType="application/vnd.openxmlformats-package.relationships+xml" Extension="rels" /> <Default ContentType="application/xml" Extension="xml" /> <Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml" PartName="/xl/sharedStrings.xml"/> <Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml" PartName="/xl/styles.xml"/> <Override ContentType="application/vnd.openxmlformats-officedocument.theme+xml" PartName="/xl/theme/theme1.xml"/> <Override ContentType="application/vnd.openxmlformats-package.core-properties+xml" PartName="/docProps/core.xml"/> <Override ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml" PartName="/docProps/app.xml"/> </Types> """ diff = compare_xml(xml, expected) assert diff is None, diff
def test_find_standard_workbook_part(datadir, wb_type, wb_name): from openpyxl25.reader.excel import _find_workbook_part src = """ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> <Override ContentType="{0}" PartName="{1}"/> </Types> """.format(wb_type, wb_name) node = fromstring(src) package = Manifest.from_tree(node) assert _find_workbook_part(package) == Override(wb_name, wb_type)
def test_no_defaults(self, Manifest): """ LibreOffice does not use the Default element """ xml = """ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> <Override PartName="/_rels/.rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/> </Types> """ node = fromstring(xml) manifest = Manifest.from_tree(node) exts = manifest.extensions assert exts == []
def test_filenames(self, datadir, Manifest): datadir.chdir() with open("manifest.xml") as src: node = fromstring(src.read()) manifest = Manifest.from_tree(node) assert manifest.filenames == [ '/xl/workbook.xml', '/xl/worksheets/sheet1.xml', '/xl/chartsheets/sheet1.xml', '/xl/theme/theme1.xml', '/xl/styles.xml', '/xl/sharedStrings.xml', '/xl/drawings/drawing1.xml', '/xl/charts/chart1.xml', '/docProps/core.xml', '/docProps/app.xml', ]
def test_find_none(self, Manifest): manifest = Manifest() assert manifest.find(WORKSHEET_TYPE) is None
class ExcelWriter(object): """Write a workbook object to an Excel file.""" def __init__(self, workbook, archive): self._archive = archive self.workbook = workbook self.manifest = Manifest() self.vba_modified = set() self._tables = [] self._charts = [] self._images = [] self._drawings = [] self._comments = [] self._pivots = [] def write_data(self): """Write the various xml files into the zip archive.""" # cleanup all worksheets archive = self._archive archive.writestr(ARC_ROOT_RELS, write_root_rels(self.workbook)) props = ExtendedProperties() archive.writestr(ARC_APP, tostring(props.to_tree())) archive.writestr(ARC_CORE, tostring(self.workbook.properties.to_tree())) if self.workbook.loaded_theme: archive.writestr(ARC_THEME, self.workbook.loaded_theme) else: archive.writestr(ARC_THEME, write_theme()) self._write_worksheets() self._write_chartsheets() self._write_images() self._write_charts() self._archive.writestr( ARC_SHARED_STRINGS, write_string_table(self.workbook.shared_strings)) self._write_external_links() stylesheet = write_stylesheet(self.workbook) archive.writestr(ARC_STYLE, tostring(stylesheet)) archive.writestr(ARC_WORKBOOK, write_workbook(self.workbook)) archive.writestr(ARC_WORKBOOK_RELS, write_workbook_rels(self.workbook)) self._merge_vba() self.manifest._write(archive, self.workbook) def _merge_vba(self): """ If workbook contains macros then extract associated files from cache of old file and add to archive """ ARC_VBA = re.compile("|".join( ('xl/vba', r'xl/drawings/.*vmlDrawing\d\.vml', 'xl/ctrlProps', 'customUI', 'xl/activeX', r'xl/media/.*\.emf'))) if self.workbook.vba_archive: for name in set( self.workbook.vba_archive.namelist()) - self.vba_modified: if ARC_VBA.match(name): self._archive.writestr( name, self.workbook.vba_archive.read(name)) def _write_images(self): # delegate to object for img in self._images: self._archive.writestr(img.path[1:], img._data()) def _write_charts(self): # delegate to object if len(self._charts) != len(set(self._charts)): raise InvalidFileException( "The same chart cannot be used in more than one worksheet") for chart in self._charts: self._archive.writestr(chart.path[1:], tostring(chart._write())) self.manifest.append(chart) def _write_drawing(self, drawing): """ Write a drawing """ self._drawings.append(drawing) drawing._id = len(self._drawings) for chart in drawing.charts: self._charts.append(chart) chart._id = len(self._charts) for img in drawing.images: self._images.append(img) img._id = len(self._images) rels_path = get_rels_path(drawing.path)[1:] self._archive.writestr(drawing.path[1:], tostring(drawing._write())) self._archive.writestr(rels_path, tostring(drawing._write_rels())) self.manifest.append(drawing) def _write_chartsheets(self): for idx, sheet in enumerate(self.workbook.chartsheets, 1): sheet._id = idx xml = tostring(sheet.to_tree()) self._archive.writestr(sheet.path[1:], xml) self.manifest.append(sheet) if sheet._drawing: self._write_drawing(sheet._drawing) rel = Relationship(type="drawing", Target=sheet._drawing.path) rels = RelationshipList() rels.append(rel) tree = rels.to_tree() rels_path = get_rels_path(sheet.path[1:]) self._archive.writestr(rels_path, tostring(tree)) def _write_comment(self, ws): cs = CommentSheet.from_comments(ws._comments) self._comments.append(cs) cs._id = len(self._comments) self._archive.writestr(cs.path[1:], tostring(cs.to_tree())) self.manifest.append(cs) if ws.legacy_drawing is None: ws.legacy_drawing = 'xl/drawings/commentsDrawing{0}.vml'.format( cs._id) vml = None else: vml = fromstring(self.workbook.vba_archive.read(ws.legacy_drawing)) vml = cs.write_shapes(vml) self._archive.writestr(ws.legacy_drawing, vml) self.vba_modified.add(ws.legacy_drawing) comment_rel = Relationship(Id="comments", type=cs._rel_type, Target=cs.path) ws._rels.append(comment_rel) def _write_worksheets(self): pivot_caches = set() for idx, ws in enumerate(self.workbook.worksheets, 1): ws._id = idx xml = ws._write() rels_path = get_rels_path(ws.path)[1:] self._archive.writestr(ws.path[1:], xml) self.manifest.append(ws) if ws._drawing: self._write_drawing(ws._drawing) for r in ws._rels.Relationship: if "drawing" in r.Type: r.Target = ws._drawing.path if ws._comments: self._write_comment(ws) if ws.legacy_drawing is not None: shape_rel = Relationship(type="vmlDrawing", Id="anysvml", Target="/" + ws.legacy_drawing) ws._rels.append(shape_rel) for t in ws._tables: self._tables.append(t) t.id = len(self._tables) t._write(self._archive) self.manifest.append(t) ws._rels[t._rel_id].Target = t.path for p in ws._pivots: if p.cache not in pivot_caches: pivot_caches.add(p.cache) p.cache._id = len(pivot_caches) self._pivots.append(p) p._id = len(self._pivots) p._write(self._archive, self.manifest) self.workbook._pivots.append(p) r = Relationship(Type=p.rel_type, Target=p.path) ws._rels.append(r) if ws._rels: tree = ws._rels.to_tree() self._archive.writestr(rels_path, tostring(tree)) def _write_external_links(self): # delegate to object """Write links to external workbooks""" wb = self.workbook for idx, link in enumerate(wb._external_links, 1): link._id = idx rels_path = get_rels_path(link.path[1:]) xml = link.to_tree() self._archive.writestr(link.path[1:], tostring(xml)) rels = RelationshipList() rels.append(link.file_link) self._archive.writestr(rels_path, tostring(rels.to_tree())) self.manifest.append(link) def save(self, filename): """Write data into the archive.""" self.write_data() self._archive.close()
def check_content_type(workbook_type, archive): src = archive.read(ARC_CONTENT_TYPES) node = fromstring(src) package = Manifest.from_tree(node) assert workbook_type in package
def test_no_dupe_overrides(self, Manifest): manifest = Manifest() assert len(manifest.Override) == 5 manifest.Override.append("a") manifest.Override.append("a") assert len(manifest.Override) == 6
def test_no_workbook(): from openpyxl25.reader.excel import _find_workbook_part with pytest.raises(IOError): part = _find_workbook_part(Manifest())
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, guess_types=False, keep_links=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :param keep_links: whether links to external workbooks should be preserved. The default is True :type keep_links: bool :rtype: :class:`openpyxl25.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl25.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only src = archive.read(ARC_CONTENT_TYPES) root = fromstring(src) package = Manifest.from_tree(root) wb_part = _find_workbook_part(package) parser = WorkbookParser(archive, wb_part.PartName[1:]) wb = parser.wb wb._data_only = data_only wb._read_only = read_only wb._keep_links = keep_links wb.guess_types = guess_types wb.template = wb_part.ContentType in (XLTX, XLTM) parser.parse() wb._sheets = [] if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) for name in archive.namelist(): wb.vba_archive.writestr(name, archive.read(name)) if read_only: wb._archive = ZipFile(filename) # get workbook-level information if ARC_CORE in valid_files: src = fromstring(archive.read(ARC_CORE)) wb.properties = DocumentProperties.from_tree(src) shared_strings = [] ct = package.find(SHARED_STRINGS) if ct is not None: strings_path = ct.PartName[1:] shared_strings = read_string_table(archive.read(strings_path)) if ARC_THEME in valid_files: wb.loaded_theme = archive.read(ARC_THEME) apply_stylesheet(archive, wb) # bind styles to workbook pivot_caches = parser.pivot_caches # get worksheets for sheet, rel in parser.find_sheets(): sheet_name = sheet.name worksheet_path = rel.target rels_path = get_rels_path(worksheet_path) rels = [] if rels_path in valid_files: rels = get_dependents(archive, rels_path) if not worksheet_path in valid_files: continue if read_only: ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._sheets.append(ws) else: fh = archive.open(worksheet_path) ws = wb.create_sheet(sheet_name) ws._rels = rels ws_parser = WorkSheetParser(ws, fh, shared_strings) ws_parser.parse() if rels: # assign any comments to cells for r in rels.find(COMMENTS_NS): src = archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if (wb.vba_archive is not None and ws.legacy_drawing is not None): ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) drawings = rels.find(SpreadsheetDrawing._rel_type) for rel in drawings: for c in find_charts(archive, rel.target): ws.add_chart(c, c.anchor) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state ws._rels = [] # reset parser.assign_names() #wb._differential_styles.styles = [] # tables may depened upon dxf archive.close() return wb
def test_no_dupe_types(self, Manifest): manifest = Manifest() assert len(manifest.Default) == 2 manifest.Default.append("a") manifest.Default.append("a") assert len(manifest.Default) == 3