def string_to_date_with_xls_validation(cls, date_str): date_obj = datetime.strptime(date_str, '%Y-%m-%d').date() try: SharedDate().datetime_to_julian(date_obj) except ValueError: return date_str else: return date_obj
def __init__(self, parent_workbook, title): Worksheet.__init__(self, parent_workbook, title) self._max_col = 0 self._max_row = 0 self._parent = parent_workbook self._fileobj_header_name = create_temporary_file(suffix='.header') self._fileobj_content_name = create_temporary_file(suffix='.content') self._fileobj_name = create_temporary_file() self._shared_date = SharedDate() self._string_builder = self._parent.strings_table_builder
def __init__(self, worksheet, column, row, value=None): self.column = column.upper() self.row = row # _value is the stored value, while value is the displayed value self._value = None self._hyperlink_rel = None self._data_type = self.TYPE_NULL if value: self.value = value self.parent = worksheet self.xf_index = 0 self._shared_date = SharedDate(base_date=worksheet.parent.excel_base_date) self.merged = False self._comment = None
def __init__(self, parent_workbook, title, worksheet_path, xml_source, string_table, style_table): Worksheet.__init__(self, parent_workbook, title) self.worksheet_path = worksheet_path self._string_table = string_table self._style_table = style_table min_col, min_row, max_col, max_row = read_dimension(xml_source=self.xml_source) self.min_col = min_col self.min_row = min_row self.max_row = max_row self.max_col = max_col self._shared_date = SharedDate(base_date=parent_workbook.excel_base_date)
def sd(): from openpyxl.date_time import SharedDate return SharedDate()
class DumpWorksheet(Worksheet): """ .. warning:: You shouldn't initialize this yourself, use :class:`openpyxl.workbook.Workbook` constructor instead, with `optimized_write = True`. """ def __init__(self, parent_workbook, title): Worksheet.__init__(self, parent_workbook, title) self._max_col = 0 self._max_row = 0 self._parent = parent_workbook self._fileobj_header_name = create_temporary_file(suffix='.header') self._fileobj_content_name = create_temporary_file(suffix='.content') self._fileobj_name = create_temporary_file() self._shared_date = SharedDate() self._string_builder = self._parent.strings_table_builder def get_temporary_file(self, filename): if filename in self._descriptors_cache: fobj = self._descriptors_cache[filename] # re-insert the value so it does not get evicted # from cache soon del self._descriptors_cache[filename] self._descriptors_cache[filename] = fobj return fobj else: if filename is None: raise WorkbookAlreadySaved('this workbook has already been saved ' 'and cannot be modified or saved anymore.') fobj = open(filename, 'r+') self._descriptors_cache[filename] = fobj if len(self._descriptors_cache) > DESCRIPTORS_CACHE_SIZE: filename, fileobj = self._descriptors_cache.popitem(last=False) fileobj.close() return fobj @property def _descriptors_cache(self): try: return self._parent._local_data.cache except AttributeError: self._parent._local_data.cache = OrderedDict() return self._parent._local_data.cache @property def filename(self): return self._fileobj_name @property def _temp_files(self): return (self._fileobj_content_name, self._fileobj_header_name, self._fileobj_name) def _unset_temp_files(self): self._fileobj_header_name = None self._fileobj_content_name = None self._fileobj_name = None def write_header(self): fobj = self.get_temporary_file(filename=self._fileobj_header_name) doc = XMLGenerator(fobj, 'utf-8') start_tag(doc, 'worksheet', { 'xmlns': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main', 'xmlns:r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'}) start_tag(doc, 'sheetPr') tag(doc, 'outlinePr', {'summaryBelow': '1', 'summaryRight': '1'}) end_tag(doc, 'sheetPr') tag(doc, 'dimension', {'ref': 'A1:%s' % (self.get_dimensions())}) start_tag(doc, 'sheetViews') start_tag(doc, 'sheetView', {'workbookViewId': '0'}) tag(doc, 'selection', {'activeCell': 'A1', 'sqref': 'A1'}) end_tag(doc, 'sheetView') end_tag(doc, 'sheetViews') tag(doc, 'sheetFormatPr', {'defaultRowHeight': '15'}) start_tag(doc, 'sheetData') def close(self): self._close_content() self._fileobj = self.get_temporary_file(filename=self._fileobj_name) self._write_fileobj(self._fileobj_header_name) self._write_fileobj(self._fileobj_content_name) self._fileobj.close() def _write_fileobj(self, fobj_name): fobj = self.get_temporary_file(filename=fobj_name) fobj.flush() fobj.seek(0) while True: chunk = fobj.read(4096) if not chunk: break self._fileobj.write(chunk) fobj.close() self._fileobj.flush() def _close_content(self): doc = self._get_content_generator() end_tag(doc, 'sheetData') end_tag(doc, 'worksheet') def get_dimensions(self): if not self._max_col or not self._max_row: return 'A1' else: return '%s%d' % (get_column_letter(self._max_col), (self._max_row)) def _get_content_generator(self): """ XXX: this is ugly, but it allows to resume writing the file even after the handle is closed""" # when I'll recreate the XMLGenerator, it will start writing at the # begining of the file, erasing previously entered rows, so we have # to move to the end of the file before adding new tags handle = self.get_temporary_file(filename=self._fileobj_content_name) handle.seek(0, 2) doc = XMLGenerator(out=handle) return doc def append(self, row): """ :param row: iterable containing values to append :type row: iterable """ doc = self._get_content_generator() self._max_row += 1 span = len(row) self._max_col = max(self._max_col, span) row_idx = self._max_row attrs = {'r': '%d' % row_idx, 'spans': '1:%d' % span} start_tag(doc, 'row', attrs) for col_idx, cell in enumerate(row): if cell is None: continue coordinate = '%s%d' % (get_column_letter(col_idx + 1), row_idx) attributes = {'r': coordinate} if isinstance(cell, bool): dtype = 'boolean' elif isinstance(cell, NUMERIC_TYPES): dtype = 'numeric' elif isinstance(cell, (datetime.datetime, datetime.date)): dtype = 'datetime' cell = self._shared_date.datetime_to_julian(cell) attributes['s'] = STYLES[dtype]['style'] elif cell and cell[0] == '=': dtype = 'formula' else: dtype = 'string' cell = self._string_builder.add(cell) if dtype != 'formula': attributes['t'] = STYLES[dtype]['type'] start_tag(doc, 'c', attributes) if dtype == 'formula': tag(doc, 'f', body='%s' % cell[1:]) tag(doc, 'v') elif dtype == 'boolean': tag(doc, 'v', body='%d' % cell) else: tag(doc, 'v', body='%s' % cell) end_tag(doc, 'c') end_tag(doc, 'row')
class Cell(object): """Describes cell associated properties. Properties of interest include style, type, value, and address. """ __slots__ = ('column', 'row', '_value', '_data_type', 'parent', 'xf_index', '_hyperlink_rel', '_shared_date', 'merged', '_comment') ERROR_CODES = {'#NULL!': 0, '#DIV/0!': 1, '#VALUE!': 2, '#REF!': 3, '#NAME?': 4, '#NUM!': 5, '#N/A': 6} TYPE_STRING = 's' TYPE_FORMULA = 'f' TYPE_NUMERIC = 'n' TYPE_BOOL = 'b' TYPE_NULL = 's' TYPE_INLINE = 'inlineStr' TYPE_ERROR = 'e' TYPE_FORMULA_CACHE_STRING = 'str' VALID_TYPES = [TYPE_STRING, TYPE_FORMULA, TYPE_NUMERIC, TYPE_BOOL, TYPE_NULL, TYPE_INLINE, TYPE_ERROR, TYPE_FORMULA_CACHE_STRING] RE_PATTERNS = { 'percentage': re.compile(r'^\-?[0-9]*\.?[0-9]*\s?\%$'), 'time': re.compile(r'^(\d|[0-1]\d|2[0-3]):[0-5]\d(:[0-5]\d)?$'), 'numeric': re.compile(r'^-?([\d]|[\d]+\.[\d]*|\.[\d]+|[1-9][\d]+\.?[\d]*)((E|e)-?[\d]+)?$'), } def __init__(self, worksheet, column, row, value=None): self.column = column.upper() self.row = row # _value is the stored value, while value is the displayed value self._value = None self._hyperlink_rel = None self._data_type = self.TYPE_NULL if value: self.value = value self.parent = worksheet self.xf_index = 0 self._shared_date = SharedDate(base_date=worksheet.parent.excel_base_date) self.merged = False self._comment = None @property def encoding(self): return self.parent.encoding def __repr__(self): return unicode("<Cell %s.%s>") % (self.parent.title, self.get_coordinate()) def check_string(self, value): """Check string coding, length, and line break character""" # convert to unicode string if not isinstance(value, unicode): value = unicode(value, self.encoding) value = unicode(value) # string must never be longer than 32,767 characters # truncate if necessary value = value[:32767] if ILLEGAL_CHARACTERS_RE.match(value): raise IllegalCharacterError # we require that newline is represented as "\n" in core, # not as "\r\n" or "\r" value = value.replace('\r\n', '\n') return value def check_numeric(self, value): """Cast value to int or float if necessary""" if not isinstance(value, NUMERIC_TYPES): try: value = int(value) except ValueError: value = float(value) return value def check_error(self, value): """Tries to convert Error" else N/A""" try: return unicode(value) except: return unicode('#N/A') def set_explicit_value(self, value=None, data_type=TYPE_STRING): """Coerce values according to their explicit type""" type_coercion_map = { self.TYPE_INLINE: self.check_string, self.TYPE_STRING: self.check_string, self.TYPE_FORMULA: self.check_string, self.TYPE_NUMERIC: self.check_numeric, self.TYPE_BOOL: bool, self.TYPE_ERROR: self.check_error} try: self._value = type_coercion_map[data_type](value) except KeyError: if data_type not in self.VALID_TYPES: msg = 'Invalid data type: %s' % data_type raise DataTypeException(msg) self._data_type = data_type # preserve old method name set_value_explicit = set_explicit_value def data_type_for_value(self, value): """Given a value, infer the correct data type""" if value is None: data_type = self.TYPE_NULL elif value is True or value is False: data_type = self.TYPE_BOOL elif isinstance(value, NUMERIC_TYPES): data_type = self.TYPE_NUMERIC elif isinstance(value, (datetime.datetime, datetime.date, datetime.time, datetime.timedelta)): data_type = self.TYPE_NUMERIC elif not value: data_type = self.TYPE_STRING elif isinstance(value, basestring) and value[0] == '=': data_type = self.TYPE_FORMULA elif isinstance(value, unicode) and self.RE_PATTERNS['numeric'].match(value): data_type = self.TYPE_NUMERIC elif not isinstance(value, unicode) and self.RE_PATTERNS['numeric'].match(str(value)): data_type = self.TYPE_NUMERIC elif isinstance(value, basestring) and value.strip() in self.ERROR_CODES: data_type = self.TYPE_ERROR elif isinstance(value, list): data_type = self.TYPE_ERROR else: data_type = self.TYPE_STRING return data_type def bind_value(self, value): """Given a value, infer type and display options.""" self._data_type = self.data_type_for_value(value) if value is None: self.set_explicit_value('', self.TYPE_NULL) return True elif self._data_type == self.TYPE_STRING: # percentage detection if isinstance(value, unicode): percentage_search = self.RE_PATTERNS['percentage'].match(value) else: percentage_search = self.RE_PATTERNS['percentage'].match(str(value)) if percentage_search and value.strip() != '%': value = float(value.replace('%', '')) / 100.0 self.set_explicit_value(value, self.TYPE_NUMERIC) self._set_number_format(NumberFormat.FORMAT_PERCENTAGE) return True # time detection if isinstance(value, unicode): time_search = self.RE_PATTERNS['time'].match(value) else: time_search = self.RE_PATTERNS['time'].match(str(value)) if time_search: sep_count = value.count(':') # pylint: disable=E1103 if sep_count == 1: hours, minutes = [int(bit) for bit in value.split(':')] # pylint: disable=E1103 seconds = 0 elif sep_count == 2: hours, minutes, seconds = \ [int(bit) for bit in value.split(':')] # pylint: disable=E1103 days = (hours / 24.0) + (minutes / 1440.0) + \ (seconds / 86400.0) self.set_explicit_value(days, self.TYPE_NUMERIC) self._set_number_format(NumberFormat.FORMAT_DATE_TIME3) return True if self._data_type == self.TYPE_NUMERIC: # date detection # if the value is a date, but not a date time, make it a # datetime, and set the time part to 0 if isinstance(value, datetime.date) and not \ isinstance(value, datetime.datetime): value = datetime.datetime.combine(value, datetime.time()) if isinstance(value, (datetime.datetime, datetime.time, datetime.timedelta)): if isinstance(value, datetime.datetime): self._set_number_format(NumberFormat.FORMAT_DATE_YYYYMMDD2) elif isinstance(value, datetime.time): self._set_number_format(NumberFormat.FORMAT_DATE_TIME6) elif isinstance(value, datetime.timedelta): self._set_number_format(NumberFormat.FORMAT_DATE_TIMEDELTA) value = SharedDate().datetime_to_julian(date=value) self.set_explicit_value(value, self.TYPE_NUMERIC) return True self.set_explicit_value(value, self._data_type) @property def value(self): """Get or set the value held in the cell. ':rtype: depends on the value (string, float, int or ' ':class:`datetime.datetime`)'""" value = self._value if self.is_date(): value = self._shared_date.from_julian(value) return value @value.setter def value(self, value): """Set the value and infer type and display options.""" self.bind_value(value) @property def internal_value(self): """Always returns the value for excel.""" return self._value @property def hyperlink(self): """Return the hyperlink target or an empty string""" return self._hyperlink_rel is not None and \ self._hyperlink_rel.target or '' @hyperlink.setter def hyperlink(self, val): """Set value and display for hyperlinks in a cell. Automatically setsthe `value` of the cell with link text, but you can modify it afterwards by setting the `value` property, and the hyperlink will remain.\n\n' ':rtype: string""" if self._hyperlink_rel is None: self._hyperlink_rel = self.parent.create_relationship("hyperlink") self._hyperlink_rel.target = val self._hyperlink_rel.target_mode = "External" if self._value is None: self.value = val @property def hyperlink_rel_id(self): """Return the id pointed to by the hyperlink, or None""" return self._hyperlink_rel is not None and \ self._hyperlink_rel.id or None def _set_number_format(self, format_code): """Set a new formatting code for numeric values""" self.style.number_format.format_code = format_code @property def has_style(self): """Check if the parent worksheet has a style for this cell""" return self.get_coordinate() in self.parent._styles # pylint: disable=W0212 @property def style(self): """Returns the :class:`openpyxl.style.Style` object for this cell""" return self.parent.get_style(self.get_coordinate()) @property def data_type(self): """Return the data type represented by this cell""" return self._data_type def get_coordinate(self): """Return the coordinate string for this cell (e.g. 'B12') :rtype: string """ return '%s%s' % (self.column, self.row) @property def address(self): """Return the coordinate string for this cell (e.g. 'B12') :rtype: string """ return self.get_coordinate() def offset(self, row=0, column=0): """Returns a cell location relative to this cell. :param row: number of rows to offset :type row: int :param column: number of columns to offset :type column: int :rtype: :class:`openpyxl.cell.Cell` """ offset_column = get_column_letter(column_index_from_string( column=self.column) + column) offset_row = self.row + row return self.parent.cell('%s%s' % (offset_column, offset_row)) def is_date(self): """Returns whether the value is *probably* a date or not :rtype: bool """ return (self.has_style and self.style.number_format.is_date_format() and isinstance(self._value, NUMERIC_TYPES)) @property def anchor(self): """ returns the expected position of a cell in pixels from the top-left of the sheet. For example, A1 anchor should be (0,0). :rtype: tuple(int, int) """ left_columns = (column_index_from_string(self.column, True) - 1) column_dimensions = self.parent.column_dimensions left_anchor = 0 default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH) for col_idx in range(left_columns): letter = get_column_letter(col_idx + 1) if letter in column_dimensions: cdw = column_dimensions.get(letter).width if cdw > 0: left_anchor += points_to_pixels(cdw) continue left_anchor += default_width row_dimensions = self.parent.row_dimensions top_anchor = 0 top_rows = (self.row - 1) default_height = points_to_pixels(DEFAULT_ROW_HEIGHT) for row_idx in range(1, top_rows + 1): if row_idx in row_dimensions: rdh = row_dimensions[row_idx].height if rdh > 0: top_anchor += points_to_pixels(rdh) continue top_anchor += default_height return (left_anchor, top_anchor) @property def comment(self): """ Returns the comment associated with this cell :rtype: :class:`openpyxl.comments.Comment` """ return self._comment @comment.setter def comment(self, value): if value is not None and value._parent is not None and value is not self.comment: raise AttributeError( "Comment already assigned to %s in worksheet %s. Cannot assign a comment to more than one cell" % (value._parent.get_coordinate(), value._parent.parent.title) ) # Ensure the number of comments for the parent worksheet is up-to-date if value is None and self._comment is not None: self.parent._comment_count -= 1 if value is not None and self._comment is None: self.parent._comment_count += 1 # orphan the old comment if self._comment is not None: self._comment._parent = None self._comment = value if value is not None: self._comment._parent = self
class IterableWorksheet(Worksheet): def __init__(self, parent_workbook, title, worksheet_path, xml_source, string_table, style_table): Worksheet.__init__(self, parent_workbook, title) self.worksheet_path = worksheet_path self._string_table = string_table self._style_table = style_table min_col, min_row, max_col, max_row = read_dimension(xml_source=self.xml_source) self.min_col = min_col self.min_row = min_row self.max_row = max_row self.max_col = max_col self._shared_date = SharedDate(base_date=parent_workbook.excel_base_date) @property def xml_source(self): return self.parent._archive.open(self.worksheet_path) @xml_source.setter def xml_source(self, value): """Base class is always supplied XML source, IteratableWorksheet obtains it on demand.""" pass def __getitem__(self, key): if isinstance(key, slice): key = "{0}:{1}".format(key) return self.iter_rows(key) def iter_rows(self, range_string='', row_offset=0, column_offset=1): """ Returns a squared range based on the `range_string` parameter, using generators. :param range_string: range of cells (e.g. 'A1:C4') :type range_string: string :param row_offset: additional rows (e.g. 4) :type row: int :param column_offset: additonal columns (e.g. 3) :type column: int :rtype: generator """ if range_string: min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset) else: min_col = column_index_from_string(self.min_col) max_col = column_index_from_string(self.max_col) + 1 min_row = self.min_row max_row = self.max_row + 6 return self.get_squared_range(min_col, min_row, max_col, max_row) def get_squared_range(self, min_col, min_row, max_col, max_row): expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)] current_row = min_row style_table = self._style_table for row, cells in groupby(self.get_cells(min_row, min_col, max_row, max_col), operator.attrgetter('row')): full_row = [] if current_row < row: for gap_row in xrange(current_row, row): dummy_cells = get_missing_cells(gap_row, expected_columns) yield tuple([dummy_cells[column] for column in expected_columns]) current_row = row temp_cells = list(cells) retrieved_columns = dict([(c.column, c) for c in temp_cells]) missing_columns = list(set(expected_columns) - set(retrieved_columns.keys())) replacement_columns = get_missing_cells(row, missing_columns) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] if cell.style_id is not None: style = style_table[int(cell.style_id)] cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212 if cell.internal_value is not None: if cell.data_type in Cell.TYPE_STRING: cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212 elif cell.data_type == Cell.TYPE_BOOL: cell = cell._replace(internal_value=cell.internal_value == '1') elif cell.is_date: cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value))) elif cell.data_type == Cell.TYPE_NUMERIC: cell = cell._replace(internal_value=float(cell.internal_value)) elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): cell = cell._replace(internal_value=unicode(cell.internal_value)) full_row.append(cell) else: full_row.append(replacement_columns[column]) current_row = row + 1 yield tuple(full_row) def get_cells(self, min_row, min_col, max_row, max_col): p = iterparse(self.xml_source) for _event, element in p: if element.tag == '{%s}c' % SHEET_MAIN_NS: coord = element.get('r') column_str, row = RE_COORDINATE.match(coord).groups() row = int(row) column = column_index_from_string(column_str) if min_col <= column <= max_col and min_row <= row <= max_row: data_type = element.get('t', 'n') style_id = element.get('s') formula = element.findtext('{%s}f' % SHEET_MAIN_NS) value = element.findtext('{%s}v' % SHEET_MAIN_NS) if formula is not None and not self.parent.data_only: data_type = Cell.TYPE_FORMULA value = "=" + formula yield RawCell(row, column_str, coord, value, data_type, style_id, None) # sub-elements of cells should be skipped if (element.tag == '{%s}v' % SHEET_MAIN_NS or element.tag == '{%s}f' % SHEET_MAIN_NS): continue element.clear() def cell(self, *args, **kwargs): # TODO return an individual cell raise NotImplementedError("use 'iter_rows()' instead") def range(self, *args, **kwargs): # TODO return a range of cells, basically get_squared_range with same interface as Worksheet raise NotImplementedError("use 'iter_rows()' instead") def rows(self): return self.iter_rows() def calculate_dimension(self): return '%s%s:%s%s' % (self.min_col, self.min_row, self.max_col, self.max_row) def get_highest_column(self): return column_index_from_string(self.max_col) def get_highest_row(self): return self.max_row