def iter_rows(workbook_name, sheet_name, xml_source, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table)
def test_parse_dxfs(datadir): datadir.chdir() reference_file = 'conditional-formatting.xlsx' wb = load_workbook(reference_file) assert isinstance(wb, Workbook) archive = ZipFile(reference_file, 'r', ZIP_DEFLATED) read_xml = archive.read(ARC_STYLE) # Verify length assert '<dxfs count="164">' in str(read_xml) assert len(wb.style_properties['dxf_list']) == 164 # Verify first dxf style reference_file = 'dxf_style.xml' with open(reference_file) as expected: diff = compare_xml(read_xml, expected.read()) assert diff is None, diff cond_styles = wb.style_properties['dxf_list'][0] assert cond_styles['font'].color == Color('FF9C0006') assert not cond_styles['font'].bold assert not cond_styles['font'].italic f = PatternFill(end_color=Color('FFFFC7CE')) assert cond_styles['fill'] == f # Verify that the dxf styles stay the same when they're written and read back in. w = StyleWriter(wb) w._write_dxfs() write_xml = get_xml(w._root) read_style_prop = read_style_table(write_xml) assert len(read_style_prop['dxf_list']) == len(wb.style_properties['dxf_list']) for i, dxf in enumerate(read_style_prop['dxf_list']): assert repr(wb.style_properties['dxf_list'][i] == dxf)
def _load_workbook(wb, archive, filename, use_iterators): valid_files = archive.namelist() # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_read_cell_style(): reference_file = os.path.join( DATADIR, 'reader', 'empty-workbook-styles.xml') with open(reference_file, 'r') as handle: content = handle.read() style_table = read_style_table(content) eq_(2, len(style_table))
def test_parse_dxfs(datadir): datadir.chdir() reference_file = 'conditional-formatting.xlsx' wb = load_workbook(reference_file) assert isinstance(wb, Workbook) archive = ZipFile(reference_file, 'r', ZIP_DEFLATED) read_xml = archive.read(ARC_STYLE) # Verify length assert '<dxfs count="164">' in str(read_xml) assert len(wb.style_properties['dxf_list']) == 164 # Verify first dxf style reference_file = 'dxf_style.xml' with open(reference_file) as expected: diff = compare_xml(read_xml, expected.read()) assert diff is None, diff cond_styles = wb.style_properties['dxf_list'][0] assert cond_styles['font'].color == Color('FF9C0006') assert not cond_styles['font'].bold assert not cond_styles['font'].italic f = PatternFill(end_color=Color('FFFFC7CE')) assert cond_styles['fill'] == f # Verify that the dxf styles stay the same when they're written and read back in. w = StyleWriter(wb) w._write_dxfs() write_xml = tostring(w._root) read_style_prop = read_style_table(write_xml) assert len(read_style_prop[2]) == len(wb.style_properties['dxf_list']) for i, dxf in enumerate(read_style_prop[2]): assert repr(wb.style_properties['dxf_list'][i] == dxf)
def test_parse_dxfs(): reference_file = os.path.join(DATADIR, 'reader', 'conditional-formatting.xlsx') wb = load_workbook(reference_file) archive = ZipFile(reference_file, 'r', ZIP_DEFLATED) read_xml = archive.read(ARC_STYLE) # Verify length assert '<dxfs count="164">' in str(read_xml) assert len(wb.style_properties['dxf_list']) == 164 # Verify first dxf style reference_file = os.path.join(DATADIR, 'writer', 'expected', 'dxf_style.xml') with open(reference_file) as expected: diff = compare_xml(read_xml, expected.read()) assert diff is None, diff cond_styles = wb.style_properties['dxf_list'][0] assert cond_styles['font']['color'] == Color('FF9C0006') assert cond_styles['font']['bold'] == False assert cond_styles['font']['italic'] == False f = Fill() f.end_color = Color('FFFFC7CE') assert cond_styles['fill'][0] == f # Verify that the dxf styles stay the same when they're written and read back in. w = StyleWriter(wb) w._write_dxfs() write_xml = get_xml(w._root) read_style_prop = read_style_table(write_xml) assert len(read_style_prop['dxf_list']) == len( wb.style_properties['dxf_list']) for i, dxf in enumerate(read_style_prop['dxf_list']): assert repr(wb.style_properties['dxf_list'][i] == dxf)
def test_read_cell_style(): reference_file = os.path.join(DATADIR, 'reader', 'empty-workbook-styles.xml') with open(reference_file, 'r') as handle: content = handle.read() style_table = read_style_table(content) eq_(2, len(style_table))
def test_read_simple_style_mappings(datadir): datadir.chdir() with open("simple-styles.xml") as content: style_properties = read_style_table(content.read())[0] assert len(style_properties) == 4 assert numbers.BUILTIN_FORMATS[9] == style_properties[1].number_format assert 'yyyy-mm-dd' == style_properties[2].number_format
def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries( range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 style_table = read_style_table(archive.read(ARC_STYLE)) source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
def test_parse_dxfs(): reference_file = os.path.join(DATADIR, 'reader', 'conditional-formatting.xlsx') wb = load_workbook(reference_file) archive = ZipFile(reference_file, 'r', ZIP_DEFLATED) read_xml = archive.read(ARC_STYLE) # Verify length assert '<dxfs count="164">' in str(read_xml) assert len(wb.style_properties['dxf_list']) == 164 # Verify first dxf style reference_file = os.path.join(DATADIR, 'writer', 'expected', 'dxf_style.xml') with open(reference_file) as expected: diff = compare_xml(read_xml, expected.read()) assert diff is None, diff cond_styles = wb.style_properties['dxf_list'][0] assert cond_styles['font']['color'] == Color('FF9C0006') assert cond_styles['font']['bold'] == False assert cond_styles['font']['italic'] == False f = Fill() f.end_color = Color('FFFFC7CE') assert cond_styles['fill'][0] == f # Verify that the dxf styles stay the same when they're written and read back in. w = StyleWriter(wb) w._write_dxfs() write_xml = get_xml(w._root) read_style_prop = read_style_table(write_xml) assert len(read_style_prop['dxf_list']) == len(wb.style_properties['dxf_list']) for i, dxf in enumerate(read_style_prop['dxf_list']): assert repr(wb.style_properties['dxf_list'][i] == dxf)
def _load_workbook(wb, archive, filename, use_iterators): valid_files = archive.namelist() # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index = i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_read_complex_style_mappings(datadir): datadir.chdir() with open("complex-styles.xml") as content: style_properties = read_style_table(content.read()) assert style_properties['table'] == { 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 0, 14: 13, 15: 14, 16: 15, 17: 10, 18: 16, 19: 17, 20: 18, 21: 19, 22: 0, 23: 10, 24: 20, 25: 21, 26: 22, 27: 23, 28: 24 }
def test_read_style(): reference_file = os.path.join(DATADIR, 'reader', 'simple-styles.xml') with open(reference_file, 'r') as handle: content = handle.read() style_table = read_style_table(content) eq_(4, len(style_table)) eq_(NumberFormat._BUILTIN_FORMATS[9], style_table[1].number_format.format_code) eq_('yyyy-mm-dd', style_table[2].number_format.format_code)
def test_read_simple_style_mappings(datadir): datadir.chdir() with open("simple-styles.xml") as content: style_properties = read_style_table(content.read()) style_table = style_properties['table'] style_list = style_properties['list'] assert len(style_table) == 4 assert NumberFormat._BUILTIN_FORMATS[9] == style_list[style_table[1]].number_format assert 'yyyy-mm-dd' == style_list[style_table[2]].number_format
def test_read_complex_style_mappings(datadir): datadir.chdir() with open("complex-styles.xml") as content: style_properties = read_style_table(content.read()) assert style_properties['table'] == {0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10, 11:11, 12:12, 13:0, 14:13, 15:14, 16:15, 17:10, 18:16, 19:17, 20:18, 21:19, 22:0, 23:10, 24:20, 25:21, 26:22, 27: 23, 28: 24}
def test_read_cell_style(): reference_file = os.path.join(DATADIR, 'reader', 'empty-workbook-styles.xml') handle = open(reference_file, 'r') try: content = handle.read() finally: handle.close() style_properties = read_style_table(content) style_table = style_properties['table'] assert len(style_table) == 2
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = archive # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet content_types = read_content_types(archive.read(ARC_CONTENT_TYPES)) sheet_types = [(sheet, contyp) for sheet, contyp in content_types if contyp in WORK_OR_CHART_TYPE] sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) worksheet_names = [worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types) if sheet_type[1] == VALID_WORKSHEET] for i, sheet_name in enumerate(worksheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, style_properties['color_index'], keep_vba=keep_vba) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, style_properties['color_index'], filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_read_style(): reference_file = os.path.join(DATADIR, 'reader', 'simple-styles.xml') handle = open(reference_file, 'r') try: content = handle.read() finally: handle.close() style_properties = read_style_table(content) style_table = style_properties['table'] assert len(style_table) == 4 assert NumberFormat._BUILTIN_FORMATS[9] == style_table[1].number_format assert 'yyyy-mm-dd' == style_table[2].number_format
def get_squared_range(self, min_col, min_row, max_col, max_row): expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)] current_row = min_row style_properties = read_style_table(self.archive.read(ARC_STYLE)) style_table = style_properties.pop('table') for row, cells in groupby(self.get_cells(min_row, min_col, max_row, max_col), operator.attrgetter('row')): full_row = [] if current_row < row: for gap_row in xrange(current_row, row): dummy_cells = get_missing_cells(gap_row, expected_columns) yield tuple([dummy_cells[column] for column in expected_columns]) current_row = row temp_cells = list(cells) retrieved_columns = dict([(c.column, c) for c in temp_cells]) missing_columns = list(set(expected_columns) - set(retrieved_columns.keys())) replacement_columns = get_missing_cells(row, missing_columns) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] if cell.style_id is not None: style = style_table[int(cell.style_id)] cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212 if cell.internal_value is not None: if cell.data_type in Cell.TYPE_STRING: cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212 elif cell.data_type == Cell.TYPE_BOOL: cell = cell._replace(internal_value=cell.internal_value == '1') elif cell.is_date: cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value))) elif cell.data_type == Cell.TYPE_NUMERIC: cell = cell._replace(internal_value=float(cell.internal_value)) elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): cell = cell._replace(internal_value=unicode(cell.internal_value)) full_row.append(cell) else: full_row.append(replacement_columns[column]) current_row = row + 1 yield tuple(full_row)
def _load_workbook(wb, archive, filename, use_iterators): valid_files = archive.namelist() # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_table = read_style_table(archive.read(ARC_STYLE)) wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0): archive = get_archive_file(workbook_name) source = xml_source if range_string: min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset) else: min_col, min_row, max_col, max_row = read_dimension(xml_source=source) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) + 1 max_row += 6 style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') source.seek(0) p = iterparse(source) return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except (BadZipfile, RuntimeError, IOError, ValueError), e: raise InvalidFileException(unicode(e)) wb = Workbook() wb._set_optimized_read() try: # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_APP)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index = i)
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=False, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only or use_iterators wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only) if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0 # what content types do we have? cts = dict(read_content_types(archive)) strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] wb.is_template = XLTX in cts or XLTM in cts try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" parsed_styles = read_style_table(archive) if parsed_styles is not None: wb._differential_styles = parsed_styles.differential_styles wb._cell_styles = parsed_styles.cell_styles wb._named_styles = parsed_styles.named_styles wb._colors = parsed_styles.color_index wb._borders = parsed_styles.border_list wb._fonts = parsed_styles.font_list wb._fills = parsed_styles.fill_list wb._number_formats = parsed_styles.number_formats wb._protections = parsed_styles.protections wb._alignments = parsed_styles.alignments wb._colors = parsed_styles.color_index wb.excel_base_date = read_excel_base_date(archive) # get worksheets wb._sheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if read_only: new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._add_sheet(new_ws) else: fh = archive.open(worksheet_path) parser = WorkSheetParser(wb, sheet_name, fh, shared_strings) parser.parse() new_ws = wb[sheet_name] new_ws.sheet_state = sheet['state'] if wb.vba_archive is not None and new_ws.legacy_drawing is not None: # We need to get the file name of the legacy drawing dirname, basename = worksheet_path.rsplit('/', 1) rels_path = '/'.join((dirname, '_rels', basename + '.rels')) rels = get_dependents(archive, rels_path) new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) drawings_file = get_drawings_file(worksheet_path, archive, valid_files) if drawings_file is not None: read_drawings(new_ws, drawings_file, archive, valid_files) wb._differential_styles = [] # reset wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive)) archive.close() return wb
def _load_workbook(wb, archive, filename, read_only, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb._read_workbook_settings(archive.read(ARC_WORKBOOK)) # what content types do we have? cts = dict(read_content_types(archive)) rels = dict strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE)) wb.shared_styles = style_table wb.style_properties = {'dxf_list':cond_styles} wb.cond_styles = cond_styles wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if not read_only: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, shared_strings, style_table, color_index=color_index, keep_vba=keep_vba) else: new_ws = read_worksheet(None, wb, sheet_name, shared_strings, style_table, color_index=color_index, worksheet_path=worksheet_path) new_ws.sheet_state = sheet.get('state') or 'visible' wb._add_sheet(new_ws) if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive))
def _load_workbook(wb, archive, filename, read_only, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb._read_workbook_settings(archive.read(ARC_WORKBOOK)) # what content types do we have? cts = dict(read_content_types(archive)) rels = dict strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_table, color_index, cond_styles = read_style_table( archive.read(ARC_STYLE)) wb.shared_styles = style_table wb.style_properties = {'dxf_list': cond_styles} wb.cond_styles = cond_styles wb.properties.excel_base_date = read_excel_base_date( xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if not read_only: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, shared_strings, style_table, color_index=color_index, keep_vba=keep_vba) else: new_ws = read_worksheet(None, wb, sheet_name, shared_strings, style_table, color_index=color_index, worksheet_path=worksheet_path) new_ws.sheet_state = sheet.get('state') or 'visible' wb._add_sheet(new_ws) if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) drawings_file = get_drawings_file(worksheet_path, archive, valid_files) if drawings_file is not None: read_drawings(new_ws, drawings_file, archive, valid_files) wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive))
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if use_iterators: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date( xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = '%s/%s' % (PACKAGE_XL, sheet['path']) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet( archive.read(worksheet_path), wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], keep_vba=keep_vba) else: new_ws = read_worksheet( None, wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], worksheet_path=worksheet_path) wb.add_sheet(new_ws) if not use_iterators: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_read_complex_style_mappings(datadir): datadir.chdir() with open("complex-styles.xml") as content: style_properties = read_style_table(content.read())[0] assert len(style_properties) == 29 assert style_properties[-1].font.bold is False
def test_read_cell_style(datadir): datadir.chdir() with open("empty-workbook-styles.xml") as content: style_properties = read_style_table(content.read()) assert len(style_properties) == 3
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if use_iterators: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: shared_strings = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: shared_strings = [] try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.shared_styles = style_properties.pop('list') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, shared_strings, style_table, color_index=style_properties['color_index'], keep_vba=keep_vba) else: new_ws = read_worksheet(None, wb, sheet_name, shared_strings, style_table, color_index=style_properties['color_index'], worksheet_path=worksheet_path) wb.add_sheet(new_ws) if not use_iterators: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))
raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except (BadZipfile, RuntimeError, IOError, ValueError), e: raise InvalidFileException(unicode(e)) wb = Workbook() wb._set_optimized_read() try: # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_APP)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename,
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = archive if use_iterators: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date( xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet content_types = read_content_types(archive.read(ARC_CONTENT_TYPES)) sheet_types = [(sheet, contyp) for sheet, contyp in content_types if contyp in WORK_OR_CHART_TYPE] sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) worksheet_names = [ worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types) if sheet_type[1] == VALID_WORKSHEET ] for i, sheet_name in enumerate(worksheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet( archive.read(worksheet_path), wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], keep_vba=keep_vba) else: new_ws = read_worksheet( None, wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], sheet_codename=sheet_codename) wb.add_sheet(new_ws, index=i) if not use_iterators: # load comments into the worksheet cells comments_file = get_comments_file(sheet_codename, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)