def _parse_value(self, element, data_type, value, style_id): if value is not None: if data_type == 'n': value = _cast_number(value) if style_id in self.date_formats: data_type = 'd' try: value = from_excel(value, self.epoch) except ValueError: msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format(coordinate, value) warn(msg) data_type = "e" value = "#VALUE!" elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'b': value = bool(int(value)) elif data_type == "str": data_type = "s" elif data_type == 'd': value = from_ISO8601(value) elif data_type == 'inlineStr': child = element.find(INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content return (data_type, value)
def parse_cell(self, element): data_type = element.get('t', 'n') coordinate = element.get('r') self.col_counter += 1 style_id = element.get('s', 0) if style_id: style_id = int(style_id) if data_type == "inlineStr": value = None else: value = element.findtext(VALUE_TAG, None) or None if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self.row_counter, self.col_counter if not self.data_only and element.find(FORMULA_TAG) is not None: data_type = 'f' value = self.parse_formula(element) elif value is not None: if data_type == 'n': value = _cast_number(value) if style_id in self.date_formats: data_type = 'd' try: value = from_excel(value, self.epoch) except ValueError: msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format( coordinate, value) warn(msg) data_type = "e" value = "#VALUE!" elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'b': value = bool(int(value)) elif data_type == "str": data_type = "s" elif data_type == 'd': value = from_ISO8601(value) elif data_type == 'inlineStr': child = element.find(INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content return { 'row': row, 'column': column, 'value': value, 'data_type': data_type, 'style_id': style_id }
def parse_cell(self, element): data_type = element.get('t', 'n') coordinate = element.get('r') self.max_column += 1 style_id = element.get('s', 0) if style_id is not None: style_id = int(style_id) if data_type == "inlineStr": value = None else: value = element.findtext(VALUE_TAG) if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self.max_row, self.max_column if not self.data_only and element.find(FORMULA_TAG) is not None: data_type = 'f' value = self.parse_formula(element) elif value is not None: if data_type == 'n': value = _cast_number(value) if style_id in self.date_formats: data_type = 'd' value = from_excel(value, self.epoch) elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'b': value = bool(int(value)) elif data_type == "str": try: value = _cast_number(value) data_type = "n" except ValueError: data_type = "s" elif data_type == 'd': value = from_ISO8601(value) elif data_type == 'inlineStr': child = element.find(INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content return { 'row': row, 'column': column, 'value': value, 'data_type': data_type, 'style_id': style_id }
def rich_content(cls, value): if type(value) is Text: return value.content segments = [] for segment in value: if segment.text: segments.append(segment) if segments: return Text(r=segments).content else: return ''
def read_string_table(xml_source): """Read in all shared strings in the table""" strings = [] src = _get_xml_iter(xml_source) for _, node in iterparse(src): if node.tag == '{%s}si' % SHEET_MAIN_NS: text = Text.from_tree(node).content text = text.replace('x005F_', '') strings.append(text) node.clear() return IndexedList(strings)
def read_string_table(xml_source): """Read in all shared strings in the table""" strings = [] STRING_TAG = '{%s}si' % SHEET_MAIN_NS for _, node in iterparse(xml_source): if node.tag == STRING_TAG: text = Text.from_tree(node).content text = text.replace('x005F_', '') node.clear() strings.append(text) return strings
def _get_row(self, element, min_col=1, max_col=None, row_counter=None): """Return cells from a particular row""" col_counter = min_col data_only = getattr(self.parent, 'data_only', False) for cell in safe_iterator(element, CELL_TAG): coordinate = cell.get('r') if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = row_counter, col_counter if max_col is not None and column > max_col: break if min_col <= column: if col_counter < column: for col_counter in range(max(col_counter, min_col), column): # pad row with missing cells yield EMPTY_CELL data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) value = None formula = cell.findtext(FORMULA_TAG) if formula is not None and not data_only: data_type = 'f' value = "=%s" % formula elif data_type == 'inlineStr': child = cell.find(INLINE_TAG) if child is not None: richtext = Text.from_tree(child) value = richtext.content else: value = cell.findtext(VALUE_TAG) or None yield ReadOnlyCell(self, row, column, value, data_type, style_id) col_counter = column + 1 if max_col is not None: for _ in range(col_counter, max_col + 1): yield EMPTY_CELL
def _get_row(self, element, min_col=1, max_col=None, row_counter=None): """Return cells from a particular row""" col_counter = min_col data_only = getattr(self.parent, 'data_only', False) for cell in safe_iterator(element, CELL_TAG): coordinate = cell.get('r') if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = row_counter, col_counter if max_col is not None and column > max_col: break if min_col <= column: if col_counter < column: for col_counter in range(max(col_counter, min_col), column): # pad row with missing cells yield EMPTY_CELL data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) value = None formula = cell.findtext(FORMULA_TAG) if formula is not None and not data_only: data_type = 'f' value = "=%s" % formula elif data_type == 'inlineStr': child = cell.find(INLINE_TAG) if child is not None: richtext = Text.from_tree(child) value = richtext.content else: value = cell.findtext(VALUE_TAG) or None yield ReadOnlyCell(self, row, column, value, data_type, style_id) col_counter = column + 1 if max_col is not None: for _ in range(col_counter, max_col+1): yield EMPTY_CELL
def __init__(self, ref="", authorId=0, guid=None, shapeId=0, text=None, commentPr=None, author=None, ): self.ref = ref self.authorId = authorId self.guid = guid self.shapeId = shapeId if text is None: text = Text() self.text = text self.commentPr = commentPr self.author = author
def mid(cls, rich_text, head, tail): st = 0 end = -1 segments = [] texts = [] for index, segment in enumerate(rich_text.r): l_text = len(segment.text) st = end + 1 end += l_text if end < head: continue elif st <= head <= end: if end < tail: text_st = head - st segment_copy = copy(segment) text = segment.text[text_st:] segment_copy.text = text segments.append(segment_copy) texts.append(text) else: text_st = head - st text_end = tail - st segment_copy = copy(segment) text = segment.text[text_st:text_end+1] segment_copy.text = text segments.append(segment_copy) texts.append(text) break elif end < tail: segment_copy = copy(segment) text = segment.text #segment_copy.text = text segments.append(segment_copy) texts.append(text) else: text_end = tail - st segment_copy = copy(segment) text = segment.text[:text_end + 1] segment_copy.text = text segments.append(segment_copy) texts.append(text) break return Text(r=segments), ''.join(texts)
def __init__(self, ref="", authorId=0, guid=None, shapeId=0, text=None, commentPr=None, author=None, height=79, width=144): self.ref = ref self.authorId = authorId self.guid = guid self.shapeId = shapeId if text is None: text = Text() self.text = text self.commentPr = commentPr self.author = author self.height = height self.width = width
def parse_cell(self, element): value = element.find(self.VALUE_TAG) if value is not None: value = value.text formula = element.find(self.FORMULA_TAG) data_type = element.get('t', 'n') coordinate = element.get('r') self._col_count += 1 style_id = element.get('s') # assign formula to cell value unless only the data is desired if formula is not None and not self.data_only: data_type = 'f' if formula.text: value = "=" + formula.text else: value = "=" formula_type = formula.get('t') if formula_type: if formula_type != "shared": self.ws.formula_attributes[coordinate] = dict( formula.attrib) else: si = formula.get( 'si') # Shared group index for shared formulas # The spec (18.3.1.40) defines shared formulae in # terms of the following: # # `master`: "The first formula in a group of shared # formulas" # `ref`: "Range of cells which the formula applies # to." It's a required attribute on the master # cell, forbidden otherwise. # `shared cell`: "A cell is shared only when si is # used and t is `shared`." # # Whether to use the cell's given formula or the # master's depends on whether the cell is shared, # whether it's in the ref, and whether it defines its # own formula, as follows: # # Shared? Has formula? | In ref Not in ref # ========= ==============|======== =============== # Yes Yes | master impl. defined # No Yes | own own # Yes No | master impl. defined # No No | ?? N/A # # The ?? is because the spec is silent on this issue, # though my inference is that the cell does not # receive a formula at all. # # For this implementation, we are using the master # formula in the two "impl. defined" cases and no # formula in the "??" case. This choice of # implementation allows us to disregard the `ref` # parameter altogether, and does not require # computing expressions like `C5 in A1:D6`. # Presumably, Excel does not generate spreadsheets # with such contradictions. if si in self.shared_formula_masters: trans = self.shared_formula_masters[si] value = trans.translate_formula(coordinate) else: self.shared_formula_masters[si] = Translator( value, coordinate) style_array = None if style_id is not None: style_id = int(style_id) style_array = self.styles[style_id] if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self._row_count, self._col_count cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array) self.ws._cells[(row, column)] = cell if value is not None: if data_type == 'n': value = _cast_number(value) if is_date_format(cell.number_format): data_type = 'd' value = from_excel(value, self.epoch) elif data_type == 'b': value = bool(int(value)) elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'str': data_type = 's' elif data_type == 'd': value = from_ISO8601(value) else: if data_type == 'inlineStr': child = element.find(self.INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content if self.guess_types or value is None: cell.value = value else: cell._value = value cell.data_type = data_type
def parse_cell(self, element): value = element.find(self.VALUE_TAG) if value is not None: value = value.text formula = element.find(self.FORMULA_TAG) data_type = element.get('t', 'n') coordinate = element.get('r') self._col_count += 1 style_id = element.get('s') # assign formula to cell value unless only the data is desired if formula is not None and not self.data_only: data_type = 'f' if formula.text: value = "=" + formula.text else: value = "=" formula_type = formula.get('t') if formula_type: if formula_type != "shared": self.ws.formula_attributes[coordinate] = dict(formula.attrib) else: si = formula.get('si') # Shared group index for shared formulas # The spec (18.3.1.40) defines shared formulae in # terms of the following: # # `master`: "The first formula in a group of shared # formulas" # `ref`: "Range of cells which the formula applies # to." It's a required attribute on the master # cell, forbidden otherwise. # `shared cell`: "A cell is shared only when si is # used and t is `shared`." # # Whether to use the cell's given formula or the # master's depends on whether the cell is shared, # whether it's in the ref, and whether it defines its # own formula, as follows: # # Shared? Has formula? | In ref Not in ref # ========= ==============|======== =============== # Yes Yes | master impl. defined # No Yes | own own # Yes No | master impl. defined # No No | ?? N/A # # The ?? is because the spec is silent on this issue, # though my inference is that the cell does not # receive a formula at all. # # For this implementation, we are using the master # formula in the two "impl. defined" cases and no # formula in the "??" case. This choice of # implementation allows us to disregard the `ref` # parameter altogether, and does not require # computing expressions like `C5 in A1:D6`. # Presumably, Excel does not generate spreadsheets # with such contradictions. if si in self.shared_formula_masters: trans = self.shared_formula_masters[si] value = trans.translate_formula(coordinate) else: self.shared_formula_masters[si] = Translator(value, coordinate) style_array = None if style_id is not None: style_id = int(style_id) style_array = self.styles[style_id] if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self._row_count, self._col_count cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array) self.ws._cells[(row, column)] = cell if value is not None: if data_type == 'n': value = _cast_number(value) if is_date_format(cell.number_format): data_type = 'd' value = from_excel(value) elif data_type == 'b': value = bool(int(value)) elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'str': data_type = 's' elif data_type == 'd': value = from_ISO8601(value) else: if data_type == 'inlineStr': child = element.find(self.INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content if self.guess_types or value is None: cell.value = value else: cell._value = value cell.data_type = data_type
def rich_content(cls, value): return Text(r=value).content