def norm_date(d, prefer_format): "handle some creative thinking about what constitutes a date" d = d.replace('.', '-').strip() formats = [ '%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y', '%d/%m/%Y', '%B %d %Y', '%B %d %y', '%d %b %y', '%Y/%m/%d', '%d %B, %y', '%d %b-%y', '%d %B-%y', '%d %B, %Y', '%d-%b-%y', '%B %d %Y', '%d / %m / %Y', '%d//%m/%Y' ] if prefer_format: formats.insert(0, prefer_format) if not prefer_format: try: datetime.strptime(d, '%m/%d/%Y') datetime.strptime(d, '%d/%m/%Y') except ValueError: pass else: raise ValueError for fmt in formats: try: return datetime.strptime(d, fmt) except ValueError: pass return from_excel(int(d))
def _parse_value(self, element, data_type, value, style_id): if value is not None: if data_type == 'n': value = _cast_number(value) if style_id in self.date_formats: data_type = 'd' try: value = from_excel(value, self.epoch) except ValueError: msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format(coordinate, value) warn(msg) data_type = "e" value = "#VALUE!" elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'b': value = bool(int(value)) elif data_type == "str": data_type = "s" elif data_type == 'd': value = from_ISO8601(value) elif data_type == 'inlineStr': child = element.find(INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content return (data_type, value)
def value(self): """Get or set the value held in the cell. ':rtype: depends on the value (string, float, int or ' ':class:`datetime.datetime`)'""" value = self._value if value is not None and self.is_date: value = from_excel(value, self.base_date) return value
def parse_cell(self, element): data_type = element.get('t', 'n') coordinate = element.get('r') self.col_counter += 1 style_id = element.get('s', 0) if style_id: style_id = int(style_id) if data_type == "inlineStr": value = None else: value = element.findtext(VALUE_TAG, None) or None if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self.row_counter, self.col_counter if not self.data_only and element.find(FORMULA_TAG) is not None: data_type = 'f' value = self.parse_formula(element) elif value is not None: if data_type == 'n': value = _cast_number(value) if style_id in self.date_formats: data_type = 'd' try: value = from_excel(value, self.epoch) except ValueError: msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format( coordinate, value) warn(msg) data_type = "e" value = "#VALUE!" elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'b': value = bool(int(value)) elif data_type == "str": data_type = "s" elif data_type == 'd': value = from_ISO8601(value) elif data_type == 'inlineStr': child = element.find(INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content return { 'row': row, 'column': column, 'value': value, 'data_type': data_type, 'style_id': style_id }
def norm_date(d): "handle some creative thinking about what constitutes a date" d = d.replace('.', '-').strip() for fmt in ['%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y']: try: return datetime.strptime(d, fmt) except ValueError: pass return from_excel(int(d))
def parse_cell(self, element): data_type = element.get('t', 'n') coordinate = element.get('r') self.max_column += 1 style_id = element.get('s', 0) if style_id is not None: style_id = int(style_id) if data_type == "inlineStr": value = None else: value = element.findtext(VALUE_TAG) if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self.max_row, self.max_column if not self.data_only and element.find(FORMULA_TAG) is not None: data_type = 'f' value = self.parse_formula(element) elif value is not None: if data_type == 'n': value = _cast_number(value) if style_id in self.date_formats: data_type = 'd' value = from_excel(value, self.epoch) elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'b': value = bool(int(value)) elif data_type == "str": try: value = _cast_number(value) data_type = "n" except ValueError: data_type = "s" elif data_type == 'd': value = from_ISO8601(value) elif data_type == 'inlineStr': child = element.find(INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content return { 'row': row, 'column': column, 'value': value, 'data_type': data_type, 'style_id': style_id }
def _make_cell_value(self, cell): if isinstance(cell.value, datetime): if cell._value == 0 and from_excel(0) == datetime(1899, 12, 30, 0, 0): # openpyxl has a bug that treats '12:00:00 AM' # as 0 seconds from the 'Windows Epoch' of 1899-12-30 return time(0, 0) elif cell.value.time() == time(0, 0): return cell.value.date() else: return cell.value return cell.value
def date_format(d): "handle some creative thinking about what constitutes a date" d = d.replace('.', '-').strip() try: datetime.strptime(d, '%m/%d/%Y') datetime.strptime(d, '%d/%m/%Y') except ValueError: pass else: raise ValueError for fmt in [ '%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y', '%d/%m/%Y', '%B %d %Y', '%B %d %y' ]: try: datetime.strptime(d, fmt) return fmt except ValueError: pass from_excel(int(d)) return 'excel'
def value(self): if self._value is None: return if self.data_type == 'n': if is_date_format(self.number_format): return from_excel(self._value, self.base_date) return self._value if self.data_type == 'b': return self._value == '1' elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): return unicode(self._value) elif self.data_type == 's': return unicode(self.shared_strings[int(self._value)]) return self._value
def norm_date(d): "handle some creative thinking about what constitutes a date" d = d.replace('.', '-').strip() if ' [this contract' in d.lower(): d = d.lower().split(' [this contract')[0] try: datetime.strptime(d, '%m/%d/%Y') datetime.strptime(d, '%d/%m/%Y') except ValueError: pass else: raise ValueError('ambiguous') for fmt in DATE_FORMATS: try: return datetime.strptime(d, fmt) except ValueError: pass return from_excel(int(d))
def parse_cell(self, element): value = element.find(self.VALUE_TAG) if value is not None: value = value.text formula = element.find(self.FORMULA_TAG) data_type = element.get('t', 'n') coordinate = element.get('r') self._col_count += 1 style_id = element.get('s') # assign formula to cell value unless only the data is desired if formula is not None and not self.data_only: data_type = 'f' if formula.text: value = "=" + formula.text else: value = "=" formula_type = formula.get('t') if formula_type: if formula_type != "shared": self.ws.formula_attributes[coordinate] = dict( formula.attrib) else: si = formula.get( 'si') # Shared group index for shared formulas # The spec (18.3.1.40) defines shared formulae in # terms of the following: # # `master`: "The first formula in a group of shared # formulas" # `ref`: "Range of cells which the formula applies # to." It's a required attribute on the master # cell, forbidden otherwise. # `shared cell`: "A cell is shared only when si is # used and t is `shared`." # # Whether to use the cell's given formula or the # master's depends on whether the cell is shared, # whether it's in the ref, and whether it defines its # own formula, as follows: # # Shared? Has formula? | In ref Not in ref # ========= ==============|======== =============== # Yes Yes | master impl. defined # No Yes | own own # Yes No | master impl. defined # No No | ?? N/A # # The ?? is because the spec is silent on this issue, # though my inference is that the cell does not # receive a formula at all. # # For this implementation, we are using the master # formula in the two "impl. defined" cases and no # formula in the "??" case. This choice of # implementation allows us to disregard the `ref` # parameter altogether, and does not require # computing expressions like `C5 in A1:D6`. # Presumably, Excel does not generate spreadsheets # with such contradictions. if si in self.shared_formula_masters: trans = self.shared_formula_masters[si] value = trans.translate_formula(coordinate) else: self.shared_formula_masters[si] = Translator( value, coordinate) style_array = None if style_id is not None: style_id = int(style_id) style_array = self.styles[style_id] if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self._row_count, self._col_count cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array) self.ws._cells[(row, column)] = cell if value is not None: if data_type == 'n': value = _cast_number(value) if is_date_format(cell.number_format): data_type = 'd' value = from_excel(value, self.epoch) elif data_type == 'b': value = bool(int(value)) elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'str': data_type = 's' elif data_type == 'd': value = from_ISO8601(value) else: if data_type == 'inlineStr': child = element.find(self.INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content if self.guess_types or value is None: cell.value = value else: cell._value = value cell.data_type = data_type
def validate(self, value): if isinstance(value, long): value = from_excel(value) return DateTimeValidator.DateTimeValidator.validate(self, value);
def parse_cell(self, element): value = element.find(self.VALUE_TAG) if value is not None: value = value.text formula = element.find(self.FORMULA_TAG) data_type = element.get('t', 'n') coordinate = element.get('r') self._col_count += 1 style_id = element.get('s') # assign formula to cell value unless only the data is desired if formula is not None and not self.data_only: data_type = 'f' if formula.text: value = "=" + formula.text else: value = "=" formula_type = formula.get('t') if formula_type: if formula_type != "shared": self.ws.formula_attributes[coordinate] = dict(formula.attrib) else: si = formula.get('si') # Shared group index for shared formulas # The spec (18.3.1.40) defines shared formulae in # terms of the following: # # `master`: "The first formula in a group of shared # formulas" # `ref`: "Range of cells which the formula applies # to." It's a required attribute on the master # cell, forbidden otherwise. # `shared cell`: "A cell is shared only when si is # used and t is `shared`." # # Whether to use the cell's given formula or the # master's depends on whether the cell is shared, # whether it's in the ref, and whether it defines its # own formula, as follows: # # Shared? Has formula? | In ref Not in ref # ========= ==============|======== =============== # Yes Yes | master impl. defined # No Yes | own own # Yes No | master impl. defined # No No | ?? N/A # # The ?? is because the spec is silent on this issue, # though my inference is that the cell does not # receive a formula at all. # # For this implementation, we are using the master # formula in the two "impl. defined" cases and no # formula in the "??" case. This choice of # implementation allows us to disregard the `ref` # parameter altogether, and does not require # computing expressions like `C5 in A1:D6`. # Presumably, Excel does not generate spreadsheets # with such contradictions. if si in self.shared_formula_masters: trans = self.shared_formula_masters[si] value = trans.translate_formula(coordinate) else: self.shared_formula_masters[si] = Translator(value, coordinate) style_array = None if style_id is not None: style_id = int(style_id) style_array = self.styles[style_id] if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = self._row_count, self._col_count cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array) self.ws._cells[(row, column)] = cell if value is not None: if data_type == 'n': value = _cast_number(value) if is_date_format(cell.number_format): data_type = 'd' value = from_excel(value) elif data_type == 'b': value = bool(int(value)) elif data_type == 's': value = self.shared_strings[int(value)] elif data_type == 'str': data_type = 's' elif data_type == 'd': value = from_ISO8601(value) else: if data_type == 'inlineStr': child = element.find(self.INLINE_STRING) if child is not None: data_type = 's' richtext = Text.from_tree(child) value = richtext.content if self.guess_types or value is None: cell.value = value else: cell._value = value cell.data_type = data_type
def validate(self, value): if isinstance(value, long): value = from_excel(value) return DateTimeValidator.DateTimeValidator.validate(self, value)
def perform_number_format(value, number_format): """This is a half-baked attempt at formatting the given value using the given Excel number_format. This is used by the tests to match values. Handled is many of the formats for numbers (int/float), datetime, date, time, and timedelta.""" if number_format == 'General' or isinstance(value, str): return value if number_format == '@': return str(value) grabit = [] def grab_escapes(number_format): nonlocal grabit def sub_grabit(m): i = len(grabit) grabit.append(m.group(1)) return f'{{{i}}}' nf = re.sub(r'\\(.)', sub_grabit, number_format) nf = re.sub(r'"([^"]*)"', sub_grabit, nf) nf = re.sub(r'\[(hh|h|mm|m|ss|s)\]', r'<\1>', nf) # So we don't match the next rule with [h] nf = re.sub(r'\[[^\]]+\]', '', nf) # Remove [Blue], [$-F800], etc nf = re.sub(r'<(hh|h|mm|m|ss|s)>', r'[\1]', nf) # Put back the [h] etc return nf def restore_escapes(nf): nonlocal grabit if len(grabit): nf = nf.format(*grabit) # Put escaped chars back in return nf if TRACE: print(f'perform_number_format({value}, {number_format})') if (isinstance(value, int) or isinstance(value, float)) and is_date_format(number_format): if '[h' in number_format or '[m' in number_format or '[s' in number_format: value = timedelta(days=value) else: value = from_excel(value) if isinstance(value, int) or isinstance(value, float): # Note: This is NOT a full implementation of Excel int/float number formatting! format_split = number_format.split(';') number_format = format_split[0] prefix = '' suffix = '' if value < 0 and len(format_split) >= 2: number_format = format_split[1] value = abs(value) elif value == 0 and len(format_split) >= 3: number_format = format_split[2] if not number_format: return '' nf = grab_escapes(number_format) fmt = 'f' if isinstance(value, int): fmt = 'd' if '%' in nf: fmt = '%' elif 'E' in nf: fmt = 'E' nf = re.sub(r'E[+0#?]+', 'E', nf) comma = '' pound = '' c_ndx = nf.find(',') d_ndx = nf.find('.') p_ndx = nf.find('#') if c_ndx >= 0: if d_ndx >=0 and c_ndx > d_ndx: while c_ndx < len(nf): value /= 100 c_ndx = nf.find(',', c_ndx+1) else: comma = ',' places = '' if d_ndx >= 0: if p_ndx > d_ndx: pound = '#' nf = nf.replace('#', '0') places = f'.{nf[d_ndx+1:].count("0")}' if fmt == 'd': value = float(value) fmt = 'f' elif fmt == 'd': zeros = nf.count('0') if zeros: fmt = f'0{zeros}' + fmt else: places = '.0' nf = re.sub(r'_.', ' ', nf) nf = nf.replace('*', '') # We can't really do this one m = re.match(r'((?:[^0#.E%,?*]*{\d+}[^0#.E%,?*]*)|(?:[^0#.E%,?*]*))[0#.E%,?*]+(.*[/][0-9?#]+)?(.*)$', nf) prefix = restore_escapes(m.group(1)) fraction = m.group(2) suffix = restore_escapes(m.group(3)) if fraction: s_ndx = fraction.find('/') suf = restore_escapes(fraction[:s_ndx]).replace('?', '').replace('#', '').replace('0', '') fraction = fraction[s_ndx+1:] if fraction.isdigit(): ival = int(value) value -= ival suf += f'{value//int(fraction)}/{fraction}' value = ival if fraction[0] != '?' or float(int(value)) != value: ival = int(value) value -= ival fr = Fraction.from_float(value).limit_denominator(10**(len(fraction))-1) suf += f'{fr.numerator}/{fr.denominator}' suffix = suf + suffix py_format = f'{prefix}{{0:{pound}{comma}{places}{fmt}}}{suffix}' value = py_format.format(value) if TRACE: print(f'perform_number_format: using {py_format} to produce {value}') return value number_format = number_format.split(';')[0] if isinstance(value, tm): value = datetime(1, 1, 1, value.hour, value.minute, value.second) elif isinstance(value, date) and not isinstance(value, datetime): value = datetime(value.year, value.month, value.day) if isinstance(value, datetime) and \ ('[h' in number_format or '[m' in number_format or '[s' in number_format): value = timedelta(hours=value.hour, minutes=value.minute, seconds=value.second + value.microsecond / 1000000.0) if isinstance(value, timedelta): total_seconds = int(value.total_seconds()) hours = total_seconds // 3600 total_minutes = total_seconds // 60 minutes = (total_seconds % 3600) // 60 seconds = total_seconds % 60 nf = grab_escapes(number_format) nf = nf.replace('[hh]', f'{hours:02d}').replace('[mm]', f'{total_minutes:02d}'). \ replace('[ss]', f'{total_seconds:02d}').replace('[h]', str(hours)). \ replace('[m]', str(total_minutes)).replace('[s]', str(total_seconds)). \ replace('mm', f'{minutes:02d}').replace('ss', f'{seconds:02d}'). \ replace('m', str(minutes)).replace('s', str(seconds)) nf = restore_escapes(nf) value = nf if TRACE: print(f'perform_number_format: timedelta produced {value} (grabit = {grabit})') if isinstance(value, datetime): if value.microsecond >= 500000: # Round up 999999 ms to the next second value = value.replace(microsecond=0) + timedelta(seconds=1) fmt = grab_escapes(number_format) fmt = fmt.replace('yyyy', '%Y').replace('yy', '%y').replace('dddd', '%A').replace('ddd', '%a'). \ replace('dd', '%D').replace('mmmm', '%B').replace('mmm', '%b').replace('AM/PM', '%p'). \ replace('ss', '%S') h_ndx = fmt.find('h') if '%p' in fmt: fmt = fmt.replace('hh', '%I') else: fmt = fmt.replace('hh', '%H') # Now let's handle the hard ones: mm, m, d, h, a/p ap_ndx = fmt.find('a/p') if ap_ndx >= 0: fmt = fmt.replace('a/p', '%p') while True: m_ndx = fmt.find('mm') if m_ndx < 0: break if h_ndx >= 0 and m_ndx > h_ndx: # it's minutes fmt = fmt[:m_ndx] + '%M' + fmt[m_ndx+2:] continue fmt = fmt[:m_ndx] + '%X' + fmt[m_ndx+2:] # it's months (corrected below) while True: m_ndx = fmt.find('m') if m_ndx < 0: break if h_ndx >= 0 and m_ndx > h_ndx: # it's minutes fmt = fmt[:m_ndx] + str(value.minute) + fmt[m_ndx+1:] continue fmt = fmt[:m_ndx] + str(value.month) + fmt[m_ndx+1:] # it's months d_ndx = fmt.find('d') if d_ndx >= 0: fmt = fmt.replace('d', str(value.day)) if h_ndx >= 0: if '%p' in fmt: hour = value.hour if hour > 12: hour -= 12 if hour == 0: hour = 12 fmt = fmt.replace('h', str(hour)) else: fmt = fmt.replace('h', str(value.hour)) fmt = fmt.replace('%D', '%d').replace('%X', '%m') fmt = restore_escapes(fmt) value = value.strftime(fmt) if ap_ndx >= 0: value = value.replace('AM', 'a').replace('PM', 'p') if TRACE: print(f'perform_number_format: using {fmt} to produce {value} (grabit={grabit})') return value