def test_book_views(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile("bug137.xlsx") parser = WorkbookParser(archive, ARC_WORKBOOK) parser.parse() assert parser.wb.views[0].activeTab == 1
def test_pivot_caches(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile("pivot.xlsx") parser = WorkbookParser(archive, ARC_WORKBOOK) parser.parse() assert list(parser.pivot_caches.keys()) == [68]
def test_ctor(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile("bug137.xlsx") parser = WorkbookParser(archive, ARC_WORKBOOK) assert parser.archive is archive assert parser.sheets == []
def test_workbook_security(self, datadir, WorkbookParser): expected_protection = WorkbookProtection() expected_protection.workbookPassword = '******' expected_protection.lockStructure = True datadir.chdir() archive = ZipFile("workbook_security.xlsx") parser = WorkbookParser(archive, ARC_WORKBOOK) parser.parse() assert parser.wb.security == expected_protection
def test_parse_calendar(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile(BytesIO(), "a") with open("workbook_1904.xml") as src: archive.writestr(ARC_WORKBOOK, src.read()) archive.writestr(ARC_WORKBOOK_RELS, b"<root />") parser = WorkbookParser(archive, ARC_WORKBOOK) assert parser.wb.excel_base_date == CALENDAR_WINDOWS_1900 parser.parse() assert parser.wb.code_name is None assert parser.wb.excel_base_date == CALENDAR_MAC_1904
def test_assign_names(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile("print_settings.xlsx") parser = WorkbookParser(archive, ARC_WORKBOOK) parser.parse() wb = parser.wb assert len(wb.defined_names.definedName) == 4 parser.assign_names() assert len(wb.defined_names.definedName) == 2 ws = wb['Sheet'] assert ws.print_title_rows == "$1:$1" assert ws.print_titles == "$1:$1" assert ws.print_area == ['$A$1:$D$5', '$B$9:$F$14']
def test_find_sheets(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile("bug137.xlsx") parser = WorkbookParser(archive, ARC_WORKBOOK) parser.parse() output = [] for sheet, rel in parser.find_sheets(): output.append([sheet.name, sheet.state, rel.Target, rel.Type]) assert output == [ ['Chart1', 'visible', 'xl/chartsheets/sheet1.xml', CHARTSHEET_REL], ['Sheet1', 'visible', 'xl/worksheets/sheet1.xml', WORKSHEET_REL], ]
def test_no_links(self, datadir, WorkbookParser): datadir.chdir() archive = ZipFile(BytesIO(), "a") with open("workbook_links.xml") as src: archive.writestr(ARC_WORKBOOK, src.read()) archive.writestr(ARC_WORKBOOK_RELS, b"<root />") parser = WorkbookParser(archive, ARC_WORKBOOK) assert parser.wb.keep_links is True with pytest.raises(KeyError): parser.parse() parser.wb._keep_links = False parser.parse() assert parser.wb._external_links == []
def test_broken_sheet_ref(self, datadir, recwarn, WorkbookParser): from openpyxl25.workbook.parser import WorkbookPackage datadir.chdir() with open("workbook_missing_id.xml", "rb") as src: xml = src.read() node = fromstring(xml) wb = WorkbookPackage.from_tree(node) archive = ZipFile(BytesIO(), "a") archive.write("workbook_links.xml", ARC_WORKBOOK) archive.writestr(ARC_WORKBOOK_RELS, b"<root />") parser = WorkbookParser(archive, ARC_WORKBOOK) parser.sheets = wb.sheets sheets = parser.find_sheets() list(sheets) w = recwarn.pop() assert issubclass(w.category, UserWarning)
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, guess_types=False, keep_links=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :param keep_links: whether links to external workbooks should be preserved. The default is True :type keep_links: bool :rtype: :class:`openpyxl25.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl25.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only src = archive.read(ARC_CONTENT_TYPES) root = fromstring(src) package = Manifest.from_tree(root) wb_part = _find_workbook_part(package) parser = WorkbookParser(archive, wb_part.PartName[1:]) wb = parser.wb wb._data_only = data_only wb._read_only = read_only wb._keep_links = keep_links wb.guess_types = guess_types wb.template = wb_part.ContentType in (XLTX, XLTM) parser.parse() wb._sheets = [] if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) for name in archive.namelist(): wb.vba_archive.writestr(name, archive.read(name)) if read_only: wb._archive = ZipFile(filename) # get workbook-level information if ARC_CORE in valid_files: src = fromstring(archive.read(ARC_CORE)) wb.properties = DocumentProperties.from_tree(src) shared_strings = [] ct = package.find(SHARED_STRINGS) if ct is not None: strings_path = ct.PartName[1:] shared_strings = read_string_table(archive.read(strings_path)) if ARC_THEME in valid_files: wb.loaded_theme = archive.read(ARC_THEME) apply_stylesheet(archive, wb) # bind styles to workbook pivot_caches = parser.pivot_caches # get worksheets for sheet, rel in parser.find_sheets(): sheet_name = sheet.name worksheet_path = rel.target rels_path = get_rels_path(worksheet_path) rels = [] if rels_path in valid_files: rels = get_dependents(archive, rels_path) if not worksheet_path in valid_files: continue if read_only: ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._sheets.append(ws) else: fh = archive.open(worksheet_path) ws = wb.create_sheet(sheet_name) ws._rels = rels ws_parser = WorkSheetParser(ws, fh, shared_strings) ws_parser.parse() if rels: # assign any comments to cells for r in rels.find(COMMENTS_NS): src = archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if (wb.vba_archive is not None and ws.legacy_drawing is not None): ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) drawings = rels.find(SpreadsheetDrawing._rel_type) for rel in drawings: for c in find_charts(archive, rel.target): ws.add_chart(c, c.anchor) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state ws._rels = [] # reset parser.assign_names() #wb._differential_styles.styles = [] # tables may depened upon dxf archive.close() return wb