def parse_auto_filter(self, element): self.ws.auto_filter.ref = element.get("ref") for fc in safe_iterator(element, '{%s}filterColumn' % SHEET_MAIN_NS): filters = fc.find('{%s}filters' % SHEET_MAIN_NS) if filters is None: continue vals = [f.get("val") for f in safe_iterator(filters, '{%s}filter' % SHEET_MAIN_NS)] blank = filters.get("blank") self.ws.auto_filter.add_filter_column(fc.get("colId"), vals, blank=blank) for sc in safe_iterator(element, '{%s}sortCondition' % SHEET_MAIN_NS): self.ws.auto_filter.add_sort_condition(sc.get("ref"), sc.get("descending"))
def _parse_xfs(self, node): """Read styles from the shared style table""" _style_ids = [] xfs = safe_iterator(node, '{%s}xf' % SHEET_MAIN_NS) for xf in xfs: style = StyleArray.from_tree(xf) al = xf.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: alignment = Alignment(**al.attrib) style.alignmentId = self.alignments.add(alignment) prot = xf.find('{%s}protection' % SHEET_MAIN_NS) if prot is not None: protection = Protection(**prot.attrib) style.protectionId = self.protections.add(protection) numFmtId = int(xf.get("numFmtId", 0)) # check for custom formats and normalise indices if numFmtId in self.custom_number_formats: format_code = self.custom_number_formats[numFmtId] style.numFmtId = self.number_formats.add(format_code) + 164 _style_ids.append(style) return IndexedList(_style_ids)
def _get_row(self, element, min_col=1, max_col=None): """Return cells from a particular row""" col_counter = min_col for cell in safe_iterator(element, CELL_TAG): coord = cell.get('r') column_str, row = coordinate_from_string(coord) column = column_index_from_string(column_str) if max_col is not None and column > max_col: break if min_col <= column: for gap in range(col_counter, column): # pad row with missing cells yield ReadOnlyCell(self, row, None, None) data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) formula = cell.findtext(FORMULA_TAG) value = cell.find(VALUE_TAG) if value is not None: value = value.text if formula is not None: if not self.parent.data_only: data_type = Cell.TYPE_FORMULA value = "=%s" % formula yield ReadOnlyCell(self, row, column_str, value, data_type, style_id) col_counter = column + 1 if max_col is not None: while col_counter <= max_col: yield ReadOnlyCell(self, row, None, None) col_counter += 1
def parse_pattern_fill(self, node): fill = dict(node.attrib) for child in safe_iterator(node): if child is not node: tag = localname(child) fill[tag] = Color(**dict(child.attrib)) return PatternFill(**fill)
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" sheetnames = set(sheet.title for sheet in workbook.worksheets) root = fromstring(xml_source) for name_node in safe_iterator(root, '{%s}definedName' % SHEET_MAIN_NS): range_name = name_node.get('name') if DISCARDED_RANGES.match(range_name): warnings.warn("Discarded range with reserved name") continue node_text = name_node.text if external_range(node_text): # treat names referring to external workbooks as values named_range = NamedValue(range_name, node_text) elif refers_to_range(node_text): destinations = split_named_range(node_text) # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list destinations = [(workbook[sheet], cells) for sheet, cells in destinations if sheet in sheetnames] if not destinations: continue named_range = NamedRange(range_name, destinations) else: named_range = NamedValue(range_name, node_text) named_range.scope = name_node.get("localSheetId") yield named_range
def parse_borders(self): """Read in the boarders""" borders = self.root.find('{%s}borders' % SHEET_MAIN_NS) if borders is not None: for border_node in safe_iterator(borders, '{%s}border' % SHEET_MAIN_NS): yield self.parse_border(border_node)
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" sheetnames = set(sheet.title for sheet in workbook.worksheets) root = fromstring(xml_source) for name_node in safe_iterator(root, '{%s}definedName' %SHEET_MAIN_NS): range_name = name_node.get('name') if DISCARDED_RANGES.match(range_name): warnings.warn("Discarded range with reserved name") continue node_text = name_node.text if external_range(node_text): # treat names referring to external workbooks as values named_range = NamedValue(range_name, node_text) elif refers_to_range(node_text): destinations = split_named_range(node_text) # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list destinations = [(workbook[sheet], cells) for sheet, cells in destinations if sheet in sheetnames] if not destinations: continue named_range = NamedRange(range_name, destinations) else: named_range = NamedValue(range_name, node_text) named_range.scope = name_node.get("localSheetId") yield named_range
def get_cells(self, min_row, min_col, max_row, max_col): p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True) for _event, element in p: if element.tag == ROW_TAG: row = int(element.get("r")) if max_row is not None and row > max_row: break if min_row <= row: for cell in safe_iterator(element, CELL_TAG): coord = cell.get('r') column_str, row = coordinate_from_string(coord) column = column_index_from_string(column_str) if max_col is not None and column > max_col: break if min_col <= column: data_type = cell.get('t', 'n') style_id = cell.get('s') formula = cell.findtext(FORMULA_TAG) value = cell.findtext(VALUE_TAG) if formula is not None and not self.parent.data_only: data_type = Cell.TYPE_FORMULA value = "=%s" % formula yield ReadOnlyCell(row, column_str, value, data_type, style_id) if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG): # sub-elements of rows should be skipped continue element.clear()
def parser_conditional_formatting(self, element): for cf in safe_iterator(element, '{%s}conditionalFormatting' % SHEET_MAIN_NS): if not cf.get('sqref'): # Potentially flag - this attribute should always be present. continue range_string = cf.get('sqref') cfRules = cf.findall('{%s}cfRule' % SHEET_MAIN_NS) if range_string not in self.ws.conditional_formatting.parse_rules: self.ws.conditional_formatting.parse_rules[range_string] = [] for cfRule in cfRules: if not cfRule.get('type') or cfRule.get('type') == 'dataBar': # dataBar conditional formatting isn't supported, as it relies on the complex <extLst> tag continue rule = {'type': cfRule.get('type')} for attr in ConditionalFormatting.rule_attributes: if cfRule.get(attr) is not None: if attr == 'priority': rule[attr] = int(cfRule.get(attr)) else: rule[attr] = cfRule.get(attr) formula = cfRule.findall('{%s}formula' % SHEET_MAIN_NS) for f in formula: if 'formula' not in rule: rule['formula'] = [] rule['formula'].append(f.text) colorScale = cfRule.find('{%s}colorScale' % SHEET_MAIN_NS) if colorScale is not None: rule['colorScale'] = {'cfvo': [], 'color': []} cfvoNodes = colorScale.findall('{%s}cfvo' % SHEET_MAIN_NS) for node in cfvoNodes: cfvo = {} if node.get('type') is not None: cfvo['type'] = node.get('type') if node.get('val') is not None: cfvo['val'] = node.get('val') rule['colorScale']['cfvo'].append(cfvo) colorNodes = colorScale.findall('{%s}color' % SHEET_MAIN_NS) for color in colorNodes: attrs = dict(color.items()) color = Color(**attrs) rule['colorScale']['color'].append(color) iconSet = cfRule.find('{%s}iconSet' % SHEET_MAIN_NS) if iconSet is not None: rule['iconSet'] = {'cfvo': []} for iconAttr in ConditionalFormatting.icon_attributes: if iconSet.get(iconAttr) is not None: rule['iconSet'][iconAttr] = iconSet.get(iconAttr) cfvoNodes = iconSet.findall('{%s}cfvo' % SHEET_MAIN_NS) for node in cfvoNodes: cfvo = {} if node.get('type') is not None: cfvo['type'] = node.get('type') if node.get('val') is not None: cfvo['val'] = node.get('val') rule['iconSet']['cfvo'].append(cfvo) self.ws.conditional_formatting.parse_rules[range_string].append(rule)
def read_sheets(archive): """Read worksheet titles and ids for a workbook""" xml_source = archive.read(ARC_WORKBOOK) tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}sheet' % SHEET_MAIN_NS): rId = int(element.get("{%s}id" % REL_NS).replace("rId", "")) yield element.get('name'), rId
def get_cells(self, min_row, min_col, max_row, max_col): p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True) for _event, element in p: if element.tag == ROW_TAG: row = int(element.get("r")) if max_row is not None and row > max_row: break if min_row <= row: for cell in safe_iterator(element, CELL_TAG): coord = cell.get('r') column_str, row = coordinate_from_string(coord) column = column_index_from_string(column_str) if max_col is not None and column > max_col: break if min_col <= column: data_type = cell.get('t', 'n') style_id = cell.get('s') formula = cell.findtext(FORMULA_TAG) value = cell.findtext(VALUE_TAG) if formula is not None and not self.parent.data_only: data_type = Cell.TYPE_FORMULA value = "=%s" % formula yield ReadOnlyCell(self, row, column_str, value, data_type, style_id) if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG): # sub-elements of rows should be skipped continue element.clear()
def read_sheets(archive): """Read worksheet titles and ids for a workbook""" xml_source = archive.read(ARC_WORKBOOK) tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}sheet' % SHEET_MAIN_NS): rId = element.get("{%s}id" % REL_NS) yield rId, element.get('name'), element.get('state')
def read_named_ranges(xml_source, workbook): """Read named ranges, excluding poorly defined ranges.""" sheetnames = set(sheet.title for sheet in workbook.worksheets) root = fromstring(xml_source) for name_node in safe_iterator(root, '{%s}definedName' % SHEET_MAIN_NS): range_name = name_node.get('name') if DISCARDED_RANGES.search(range_name) or BUGGY_NAMED_RANGES.search( range_name): continue node_text = name_node.text if node_text is None: named_range = NamedRangeContainingValue(range_name, node_text) elif refers_to_range(node_text): destinations = split_named_range(node_text) # it can happen that a valid named range references # a missing worksheet, when Excel didn't properly maintain # the named range list destinations = [(workbook[sheet], cells) for sheet, cells in destinations if sheet in sheetnames] if not destinations: continue named_range = NamedRange(range_name, destinations) location_id = name_node.get("localSheetId") if location_id is not None: named_range.scope = workbook.worksheets[int(location_id)] yield named_range
def _get_row(self, element, min_col=1, max_col=None): """Return cells from a particular row""" col_counter = min_col for cell in safe_iterator(element, CELL_TAG): coordinate = cell.get('r') row, column = coordinate_to_tuple(coordinate) if max_col is not None and column > max_col: break if min_col <= column: if col_counter < column: for col_counter in range(max(col_counter, min_col), column): # pad row with missing cells yield EMPTY_CELL data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) formula = cell.findtext(FORMULA_TAG) value = cell.find(VALUE_TAG) if value is not None: value = value.text if formula is not None: if not self.parent.data_only: data_type = 'f' value = "=%s" % formula yield ReadOnlyCell(self, row, column, value, data_type, style_id) col_counter = column + 1 if max_col is not None: for _ in range(col_counter, max_col+1): yield EMPTY_CELL
def parse_cell_xfs(self): """Read styles from the shared style table""" cell_xfs = self.root.find('{%s}cellXfs' % SHEET_MAIN_NS) styles_list = self.style_prop['list'] if cell_xfs is None: # can happen on bad OOXML writers (e.g. Gnumeric) return builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs_nodes = safe_iterator(cell_xfs, '{%s}xf' % SHEET_MAIN_NS) for index, cell_xfs_node in enumerate(cell_xfs_nodes): _style = {} number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: format_code = builtin_formats.get(number_format_id, 'General') else: fmt_code = self.custom_num_formats.get(number_format_id) if fmt_code is not None: format_code = fmt_code else: raise MissingNumberFormat('%s' % number_format_id) _style['number_format'] = NumberFormat(format_code=format_code) if bool(cell_xfs_node.get('applyAlignment')): alignment = {} al = cell_xfs_node.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: for key in ('horizontal', 'vertical', 'indent'): _value = al.get(key) if _value is not None: alignment[key] = _value alignment['wrap_text'] = bool(al.get('wrapText')) alignment['shrink_to_fit'] = bool(al.get('shrinkToFit')) text_rotation = al.get('textRotation') if text_rotation is not None: alignment['text_rotation'] = int(text_rotation) # ignore justifyLastLine option when horizontal = distributed _style['alignment'] = Alignment(**alignment) if bool(cell_xfs_node.get('applyFont')): _style['font'] = self.font_list[int(cell_xfs_node.get('fontId'))].copy() if bool(cell_xfs_node.get('applyFill')): _style['fill'] = self.fill_list[int(cell_xfs_node.get('fillId'))].copy() if bool(cell_xfs_node.get('applyBorder')): _style['border'] = self.border_list[int(cell_xfs_node.get('borderId'))].copy() if bool(cell_xfs_node.get('applyProtection')): protection = {} prot = cell_xfs_node.find('{%s}protection' % SHEET_MAIN_NS) # Ignore if there are no protection sub-nodes if prot is not None: protection['locked'] = bool(prot.get('locked')) protection['hidden'] = bool(prot.get('hidden')) _style['protection'] = Protection(**protection) self.style_prop['table'][index] = styles_list.add(Style(**_style))
def parse_ranges(xml): tree = fromstring(xml) book = tree.find('{%s}externalBook' % SHEET_MAIN_NS) if book is None: return names = book.find('{%s}definedNames' % SHEET_MAIN_NS) for n in safe_iterator(names, '{%s}definedName' % SHEET_MAIN_NS): yield ExternalRange(**n.attrib)
def parse_row_dimensions(self, row): attrs = dict(row.attrib) attrs['worksheet'] = self.ws dim = RowDimension(**attrs) self.ws.row_dimensions[dim.index] = dim for cell in safe_iterator(row, self.CELL_TAG): self.parse_cell(cell)
def parse_color_index(self): """Read in the list of indexed colors""" colors = self.root.find('{%s}colors' % SHEET_MAIN_NS) if colors is not None: indexedColors = colors.find('{%s}indexedColors' % SHEET_MAIN_NS) if indexedColors is not None: color_nodes = safe_iterator(indexedColors, '{%s}rgbColor' % SHEET_MAIN_NS) self.color_index = [node.get('rgb') for node in color_nodes]
def read_properties(xml_source): properties = DocumentProperties() root = fromstring(xml_source) for node in safe_iterator(root): tag = localname(node) setattr(properties, tag, node.text) return properties
def test_write_chart(self, bar_chart): """check if some characteristic tags of LineChart are there""" cw = BarChartWriter(bar_chart) cw._write_chart() tagnames = ["{%s}barChart" % CHART_NS, "{%s}valAx" % CHART_NS, "{%s}catAx" % CHART_NS] root = safe_iterator(cw.root) chart_tags = [e.tag for e in root] for tag in tagnames: assert tag in chart_tags
def read_rels(archive): """Read relationships for a workbook""" xml_source = archive.read(ARC_WORKBOOK_RELS) rels = {} tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}Relationship' % PKG_REL_NS): rId = int(element.get('Id').replace("rId", "")) rels[rId] = {'path': element.get('Target')} return rels
def parser(element): """ Parse dataValidation tag """ dv = DataValidation(**element.attrib) for attr in ("formula1", "formula2"): for f in safe_iterator(element, "{%s}%s" % (SHEET_MAIN_NS, attr)): setattr(dv, attr, f.text) return dv
def read_sheets(archive): """Read worksheet titles and ids for a workbook""" xml_source = archive.read(ARC_WORKBOOK) tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}sheet' % SHEET_MAIN_NS): attrib = element.attrib attrib['id'] = attrib["{%s}id" % REL_NS] del attrib["{%s}id" % REL_NS] yield attrib
def read_rels(archive): """Read relationships for a workbook""" xml_source = archive.read(ARC_WORKBOOK_RELS) rels = {} tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}Relationship' % PKG_REL_NS): rId = int(element.get('Id').replace("rId", "")) rels[rId] = {'path':element.get('Target')} return rels
def _parse_xfs(self, node): """Read styles from the shared style table""" _styles = [] _style_ids = [] builtin_formats = numbers.BUILTIN_FORMATS xfs = safe_iterator(node, '{%s}xf' % SHEET_MAIN_NS) for index, xf in enumerate(xfs): _style = {} attrs = dict(xf.attrib) alignmentId = protectionId = 0 numFmtId = int(xf.get("numFmtId", 0)) fontId = int(xf.get("fontId", 0)) fillId = int(xf.get("fillId", 0)) borderId = int(xf.get("borderId", 0)) if numFmtId < 164: format_code = builtin_formats.get(numFmtId, 'General') else: try: format_code = self.number_formats[numFmtId - 164] except IndexError: if self.number_formats: format_code = self.number_formats[0] else: format_code = None _style['number_format'] = format_code if bool_attrib(xf, 'applyAlignment'): al = xf.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: alignment = Alignment(**al.attrib) attrs['alignmentId'] = self.alignments.add(alignment) _style['alignment'] = alignment if bool_attrib(xf, 'applyFont'): _style['font'] = self.font_list[fontId] if bool_attrib(xf, 'applyFill'): _style['fill'] = self.fill_list[fillId] if bool_attrib(xf, 'applyBorder'): _style['border'] = self.border_list[borderId] if bool_attrib(xf, 'applyProtection'): prot = xf.find('{%s}protection' % SHEET_MAIN_NS) if prot is not None: protection = Protection(**prot.attrib) attrs['protectionId'] = self.protections.add(protection) _style['protection'] = protection _styles.append(Style(**_style)) _style_ids.append(StyleId(**attrs)) self.shared_styles = _styles self.cell_styles = IndexedList(_style_ids)
def parse_row_dimensions(self, row): rowId = int(row.get('r')) ht = row.get('ht', -1) if rowId not in self.ws.row_dimensions: self.ws.row_dimensions[rowId] = RowDimension(rowId, height=ht) style_index = row.get('s') if row.get('customFormat') and style_index: self.ws._styles[rowId] = self.style_table.get(int(style_index)) for cell in safe_iterator(row, self.CELL_TAG): self.parse_cell(cell)
def parse_row_dimensions(self, row): rowId = int(row.get('r')) ht = row.get('ht') if rowId not in self.ws.row_dimensions: self.ws.row_dimensions[rowId] = RowDimension(rowId, height=ht) style_index = row.get('s') if row.get('customFormat') and style_index: self.ws._styles[rowId] = self.style_table.get(int(style_index)) for cell in safe_iterator(row, self.CELL_TAG): self.parse_cell(cell)
def parse_custom_num_formats(self): """Read in custom numeric formatting rules from the shared style table""" custom_formats = {} num_fmts = self.root.find('{%s}numFmts' % SHEET_MAIN_NS) if num_fmts is not None: num_fmt_nodes = safe_iterator(num_fmts, '{%s}numFmt' % SHEET_MAIN_NS) for num_fmt_node in num_fmt_nodes: fmt_id = int(num_fmt_node.get('numFmtId')) fmt_code = num_fmt_node.get('formatCode').lower() custom_formats[fmt_id] = fmt_code self.custom_num_formats = custom_formats
def parse_row_dimensions(self, row): attrs = dict(row.attrib) attrs['worksheet'] = self.ws for key in set(attrs): if key.startswith('{'): #ignore custom namespaces del attrs[key] dim = RowDimension(**attrs) if dim.index not in self.ws.row_dimensions: self.ws.row_dimensions[dim.index] = dim for cell in safe_iterator(row, self.CELL_TAG): self.parse_cell(cell)
def test_write_chart(self, line_chart): """check if some characteristic tags of LineChart are there""" cw = LineChartWriter(line_chart) cw._write_chart() tagnames = ['{%s}lineChart' % CHART_NS, '{%s}valAx' % CHART_NS, '{%s}catAx' % CHART_NS] root = safe_iterator(cw.root) chart_tags = [e.tag for e in root] for tag in tagnames: assert tag in chart_tags
def parse_row_dimensions(self, row): attrs = dict(row.attrib) for key in set(attrs): if key.startswith('{'): #ignore custom namespaces del attrs[key] dim = RowDimension(**attrs) if dim.index not in self.ws.row_dimensions: self.ws.row_dimensions[dim.index] = dim if row.get('customFormat') and dim.style: self.ws._styles[dim.index] = self.style_table.get(dim.style) for cell in safe_iterator(row, self.CELL_TAG): self.parse_cell(cell)
def parse_cell_xfs(self): """Read styles from the shared style table""" cell_xfs = self.root.find('{%s}cellXfs' % SHEET_MAIN_NS) if cell_xfs is None: # can happen on bad OOXML writers (e.g. Gnumeric) return builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs_nodes = safe_iterator(cell_xfs, '{%s}xf' % SHEET_MAIN_NS) for index, cell_xfs_node in enumerate(cell_xfs_nodes): new_style = Style(static=True) number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: new_style.number_format.format_code = \ builtin_formats.get(number_format_id, 'General') else: fmt_code = self.custom_num_formats.get(number_format_id) if fmt_code is not None: new_style.number_format.format_code = fmt_code else: raise MissingNumberFormat('%s' % number_format_id) if bool(cell_xfs_node.get('applyAlignment')): alignment = cell_xfs_node.find('{%s}alignment' % SHEET_MAIN_NS) if alignment is not None: for key in ('horizontal', 'vertical', 'indent'): _value = alignment.get(key) if _value is not None: setattr(new_style.alignment, key, _value) new_style.alignment.wrap_text = bool(alignment.get('wrapText')) new_style.alignment.shrink_to_fit = bool(alignment.get('shrinkToFit')) text_rotation = alignment.get('textRotation') if text_rotation is not None: new_style.alignment.text_rotation = int(text_rotation) # ignore justifyLastLine option when horizontal = distributed if bool(cell_xfs_node.get('applyFont')): new_style.font = deepcopy(self.font_list[int(cell_xfs_node.get('fontId'))]) if bool(cell_xfs_node.get('applyFill')): new_style.fill = deepcopy(self.fill_list[int(cell_xfs_node.get('fillId'))]) if bool(cell_xfs_node.get('applyBorder')): new_style.borders = deepcopy(self.border_list[int(cell_xfs_node.get('borderId'))]) if bool(cell_xfs_node.get('applyProtection')): protection = cell_xfs_node.find('{%s}protection' % SHEET_MAIN_NS) # Ignore if there are no protection sub-nodes if protection is not None: new_style.protection.locked = bool(protection.get('locked')) new_style.protection.hidden = bool(protection.get('hidden')) self.style_prop['table'][index] = new_style
def test_write_chart(self, pie_chart): """check if some characteristic tags of PieChart are there""" cw = PieChartWriter(pie_chart) cw._write_chart() tagnames = ['{%s}pieChart' % CHART_NS, '{%s}varyColors' % CHART_NS] root = safe_iterator(cw.root) chart_tags = [e.tag for e in root] for tag in tagnames: assert tag in chart_tags assert 'c:catAx' not in chart_tags
def _parse_xfs(self, node): """Read styles from the shared style table""" _styles = [] _style_ids = [] builtin_formats = numbers.BUILTIN_FORMATS xfs = safe_iterator(node, '{%s}xf' % SHEET_MAIN_NS) for index, xf in enumerate(xfs): _style = {} attrs = dict(xf.attrib) alignmentId = protectionId = 0 numFmtId = int(xf.get("numFmtId", 0)) fontId = int(xf.get("fontId", 0)) fillId = int(xf.get("fillId", 0)) borderId = int(xf.get("borderId", 0)) # check for custom formats and normalise indices if numFmtId in self.custom_number_formats: format_code = self.custom_number_formats[numFmtId] attrs["numFmtId"] = self.number_formats.add(format_code) + 164 else: format_code = builtin_formats[numFmtId] _style['number_format'] = format_code if bool_attrib(xf, 'applyAlignment'): al = xf.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: alignment = Alignment(**al.attrib) attrs['alignmentId'] = self.alignments.add(alignment) _style['alignment'] = alignment if bool_attrib(xf, 'applyFont'): _style['font'] = self.font_list[fontId] if bool_attrib(xf, 'applyFill'): _style['fill'] = self.fill_list[fillId] if bool_attrib(xf, 'applyBorder'): _style['border'] = self.border_list[borderId] if bool_attrib(xf, 'applyProtection'): prot = xf.find('{%s}protection' % SHEET_MAIN_NS) if prot is not None: protection = Protection(**prot.attrib) attrs['protectionId'] = self.protections.add(protection) _style['protection'] = protection _styles.append(Style(**_style)) _style_ids.append(StyleId(**attrs)) self.shared_styles = _styles self.cell_styles = IndexedList(_style_ids)
def parse_cell_xfs(self): """Read styles from the shared style table""" cell_xfs = self.root.find('{%s}cellXfs' % SHEET_MAIN_NS) _styles = [] if cell_xfs is None: # can happen on bad OOXML writers (e.g. Gnumeric) return builtin_formats = numbers.BUILTIN_FORMATS xfs = safe_iterator(cell_xfs, '{%s}xf' % SHEET_MAIN_NS) for index, xf in enumerate(xfs): _style = {} num_fmt = xf.get('numFmtId') if num_fmt is not None: num_fmt = int(num_fmt) if num_fmt < 164: format_code = builtin_formats.get(num_fmt, 'General') else: fmt_code = self.custom_num_formats.get(num_fmt) if fmt_code is not None: format_code = fmt_code else: raise MissingNumberFormat('%s' % num_fmt) _style['number_format'] = format_code if bool_attrib(xf, 'applyAlignment'): alignment = {} al = xf.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: alignment = al.attrib _style['alignment'] = Alignment(**alignment) if bool_attrib(xf, 'applyFont'): _style['font'] = self.font_list[int(xf.get('fontId'))].copy() if bool_attrib(xf, 'applyFill'): _style['fill'] = self.fill_list[int(xf.get('fillId'))].copy() if bool_attrib(xf, 'applyBorder'): _style['border'] = self.border_list[int( xf.get('borderId'))].copy() if bool_attrib(xf, 'applyProtection'): protection = {} prot = xf.find('{%s}protection' % SHEET_MAIN_NS) if prot is not None: protection.update(prot.attrib) _style['protection'] = Protection(**protection) _styles.append(Style(**_style)) self.shared_styles = IndexedList(_styles)
def read_rels(archive): """Read relationships for a workbook""" xml_source = archive.read(ARC_WORKBOOK_RELS) tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}Relationship' % PKG_REL_NS): rId = element.get('Id') pth = element.get("Target") # normalise path if pth.startswith("/xl"): pth = pth.replace("/xl", "xl") elif not pth.startswith("xl") and not pth.startswith(".."): pth = "xl/" + pth yield rId, {'path': pth}
def parse_cell_xfs(self): """Read styles from the shared style table""" cell_xfs = self.root.find('{%s}cellXfs' % SHEET_MAIN_NS) styles_list = self.style_prop['list'] if cell_xfs is None: # can happen on bad OOXML writers (e.g. Gnumeric) return builtin_formats = NumberFormat._BUILTIN_FORMATS cell_xfs_nodes = safe_iterator(cell_xfs, '{%s}xf' % SHEET_MAIN_NS) for index, cell_xfs_node in enumerate(cell_xfs_nodes): _style = {} number_format_id = int(cell_xfs_node.get('numFmtId')) if number_format_id < 164: format_code = builtin_formats.get(number_format_id, 'General') else: fmt_code = self.custom_num_formats.get(number_format_id) if fmt_code is not None: format_code = fmt_code else: raise MissingNumberFormat('%s' % number_format_id) _style['number_format'] = NumberFormat(format_code=format_code) if bool(cell_xfs_node.get('applyAlignment')): alignment = {} al = cell_xfs_node.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: alignment = al.attrib _style['alignment'] = Alignment(**alignment) if bool(cell_xfs_node.get('applyFont')): _style['font'] = self.font_list[int( cell_xfs_node.get('fontId'))].copy() if bool(cell_xfs_node.get('applyFill')): _style['fill'] = self.fill_list[int( cell_xfs_node.get('fillId'))].copy() if bool(cell_xfs_node.get('applyBorder')): _style['border'] = self.border_list[int( cell_xfs_node.get('borderId'))].copy() if bool(cell_xfs_node.get('applyProtection')): protection = {} prot = cell_xfs_node.find('{%s}protection' % SHEET_MAIN_NS) if prot is not None: protection.update(prot.attrib) _style['protection'] = Protection(**protection) self.style_prop['table'][index] = styles_list.add(Style(**_style))
def parse_cell_xfs(self): """Read styles from the shared style table""" cell_xfs = self.root.find('{%s}cellXfs' % SHEET_MAIN_NS) _styles = [] if cell_xfs is None: # can happen on bad OOXML writers (e.g. Gnumeric) return builtin_formats = numbers.BUILTIN_FORMATS xfs = safe_iterator(cell_xfs, '{%s}xf' % SHEET_MAIN_NS) for index, xf in enumerate(xfs): _style = {} num_fmt = xf.get('numFmtId') if num_fmt is not None: num_fmt = int(num_fmt) if num_fmt < 164: format_code = builtin_formats.get(num_fmt, 'General') else: fmt_code = self.custom_num_formats.get(num_fmt) if fmt_code is not None: format_code = fmt_code else: raise MissingNumberFormat('%s' % num_fmt) _style['number_format'] = format_code if bool_attrib(xf, 'applyAlignment'): alignment = {} al = xf.find('{%s}alignment' % SHEET_MAIN_NS) if al is not None: alignment = al.attrib _style['alignment'] = Alignment(**alignment) if bool_attrib(xf, 'applyFont'): _style['font'] = self.font_list[int(xf.get('fontId'))].copy() if bool_attrib(xf, 'applyFill'): _style['fill'] = self.fill_list[int(xf.get('fillId'))].copy() if bool_attrib(xf, 'applyBorder'): _style['border'] = self.border_list[int(xf.get('borderId'))].copy() if bool_attrib(xf, 'applyProtection'): protection = {} prot = xf.find('{%s}protection' % SHEET_MAIN_NS) if prot is not None: protection.update(prot.attrib) _style['protection'] = Protection(**protection) _styles.append(Style(**_style)) self.shared_styles = IndexedList(_styles)
def parse_font(self, font_node): """Read individual font""" font = {} for child in safe_iterator(font_node): if child is not font_node: tag = localname(child) font[tag] = child.get("val", True) underline = font_node.find('{%s}u' % SHEET_MAIN_NS) if underline is not None: font['u'] = underline.get('val', 'single') color = font_node.find('{%s}color' % SHEET_MAIN_NS) if color is not None: font['color'] = Color(**dict(color.attrib)) return Font(**font)
def read_rels(archive): """Read relationships for a workbook""" xml_source = archive.read(ARC_WORKBOOK_RELS) tree = fromstring(xml_source) for element in safe_iterator(tree, '{%s}Relationship' % PKG_REL_NS): rId = element.get('Id') pth = element.get("Target") typ = element.get('Type') # normalise path if pth.startswith("/xl"): pth = pth.replace("/xl", "xl") elif not pth.startswith("xl") and not pth.startswith(".."): pth = "xl/" + pth yield rId, {'path':pth, 'type':typ}
def _parse_style_names(self): """ Extract style names. There can be duplicates in which case last wins """ node = self.root.find("{%s}cellStyles" % SHEET_MAIN_NS) names = {} for _name in safe_iterator(node, '{%s}cellStyle' % SHEET_MAIN_NS): name = _name.get("name") style = NamedStyle(name=name, builtinId=_name.get("builtinId"), hidden=_name.get("hidden")) style.xfId = int(_name.get("xfId")) names[name] = style return names
def test_write_chart(self, pie_chart): """check if some characteristic tags of PieChart are there""" cw = PieChartWriter(pie_chart) cw._write_chart() tagnames = ['{%s}pieChart' % CHART_NS, '{%s}varyColors' % CHART_NS ] root = safe_iterator(cw.root) chart_tags = [e.tag for e in root] for tag in tagnames: assert tag in chart_tags assert 'c:catAx' not in chart_tags
def _parse_style_names(self): """ Extract style names. There can be duplicates in which case last wins """ node = self.root.find("{%s}cellStyles" % SHEET_MAIN_NS) names = {} for _name in safe_iterator(node, '{%s}cellStyle' % SHEET_MAIN_NS): name = _name.get("name") style = NamedStyle(name=name, builtinId=_name.get("builtinId"), hidden=_name.get("hidden") ) style.xfId = int(_name.get("xfId")) names[name] = style return names
def _get_row(self, element, min_col=1, max_col=None, row_counter=None): """Return cells from a particular row""" col_counter = min_col data_only = getattr(self.parent, 'data_only', False) for cell in safe_iterator(element, CELL_TAG): coordinate = cell.get('r') if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = row_counter, col_counter if max_col is not None and column > max_col: break if min_col <= column: if col_counter < column: for col_counter in range(max(col_counter, min_col), column): # pad row with missing cells yield EMPTY_CELL data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) value = None formula = cell.findtext(FORMULA_TAG) if formula is not None and not data_only: data_type = 'f' value = "=%s" % formula elif data_type == 'inlineStr': child = cell.find(INLINE_TAG) if child is not None: richtext = Text.from_tree(child) value = richtext.content else: value = cell.findtext(VALUE_TAG) or None yield ReadOnlyCell(self, row, column, value, data_type, style_id) col_counter = column + 1 if max_col is not None: for _ in range(col_counter, max_col + 1): yield EMPTY_CELL
def _get_row(self, element, min_col=1, max_col=None, row_counter=None): """Return cells from a particular row""" col_counter = min_col data_only = getattr(self.parent, 'data_only', False) for cell in safe_iterator(element, CELL_TAG): coordinate = cell.get('r') if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = row_counter, col_counter if max_col is not None and column > max_col: break if min_col <= column: if col_counter < column: for col_counter in range(max(col_counter, min_col), column): # pad row with missing cells yield EMPTY_CELL data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) value = None formula = cell.findtext(FORMULA_TAG) if formula is not None and not data_only: data_type = 'f' value = "=%s" % formula elif data_type == 'inlineStr': child = cell.find(INLINE_TAG) if child is not None: richtext = Text.from_tree(child) value = richtext.content else: value = cell.findtext(VALUE_TAG) or None yield ReadOnlyCell(self, row, column, value, data_type, style_id) col_counter = column + 1 if max_col is not None: for _ in range(col_counter, max_col+1): yield EMPTY_CELL