def get_text(rich_node): """Read rich text, discarding formatting if not disallowed""" text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS) partial_text = text_node.text or unicode('') if text_node.get('{%s}space' % XML_NS) != 'preserve': partial_text = partial_text.strip() return unicode(partial_text)
def get_text(xmlns, rich_node): """Read rich text, discarding formatting if not disallowed""" text_node = rich_node.find(QName(xmlns, 't').text) partial_text = text_node.text or unicode('') if text_node.get(QName(NAMESPACES['xml'], 'space').text) != 'preserve': partial_text = partial_text.strip() return unicode(partial_text)
def load_workbook(filename, use_iterators=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ try: # Python 2 is_file_instance = isinstance(filename, file) except NameError: # Python 3 from io import BufferedReader is_file_instance = isinstance(filename, BufferedReader) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if "b" not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, "r", ZIP_DEFLATED) except BadZipfile: try: repair_central_directory(filename) archive = ZipFile(filename, "r", ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook() if use_iterators: wb._set_optimized_read() try: _load_workbook(wb, archive, filename, use_iterators) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb
def get_squared_range(self, min_col, min_row, max_col, max_row): expected_columns = [ get_column_letter(ci) for ci in xrange(min_col, max_col) ] current_row = min_row style_table = self._style_table for row, cells in groupby( self.get_cells(min_row, min_col, max_row, max_col), operator.attrgetter('row')): full_row = [] if current_row < row: for gap_row in xrange(current_row, row): dummy_cells = get_missing_cells(gap_row, expected_columns) yield tuple( [dummy_cells[column] for column in expected_columns]) current_row = row temp_cells = list(cells) retrieved_columns = dict([(c.column, c) for c in temp_cells]) missing_columns = list( set(expected_columns) - set(retrieved_columns.keys())) replacement_columns = get_missing_cells(row, missing_columns) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] if cell.style_id is not None: style = style_table[int(cell.style_id)] cell = cell._replace( number_format=style.number_format.format_code ) #pylint: disable-msg=W0212 if cell.internal_value is not None: if cell.data_type in Cell.TYPE_STRING: cell = cell._replace(internal_value=unicode( self._string_table[int(cell.internal_value)]) ) #pylint: disable-msg=W0212 elif cell.data_type == Cell.TYPE_BOOL: cell = cell._replace( internal_value=cell.internal_value == '1') elif cell.is_date: cell = cell._replace( internal_value=self._shared_date.from_julian( float(cell.internal_value))) elif cell.data_type == Cell.TYPE_NUMERIC: cell = cell._replace( internal_value=float(cell.internal_value)) elif cell.data_type in ( Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): cell = cell._replace( internal_value=unicode(cell.internal_value)) full_row.append(cell) else: full_row.append(replacement_columns[column]) current_row = row + 1 yield tuple(full_row)
def load_workbook(filename, use_iterators=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ try: # Python 2 is_file_instance = isinstance(filename, file) except NameError: # Python 3 from io import BufferedReader is_file_instance = isinstance(filename, BufferedReader) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if 'b' not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: try: repair_central_directory(filename) archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook() if use_iterators: wb._set_optimized_read() try: _load_workbook(wb, archive, filename, use_iterators) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb
def get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date): expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)] current_row = min_row for row, cells in get_rows(p, min_row=min_row, max_row=max_row, min_column=min_col, max_column=max_col): full_row = [] if current_row < row: for gap_row in xrange(current_row, row): dummy_cells = get_missing_cells(gap_row, expected_columns) yield tuple([dummy_cells[column] for column in expected_columns]) current_row = row temp_cells = list(cells) retrieved_columns = dict([(c.column, c) for c in temp_cells]) missing_columns = list(set(expected_columns) - set(retrieved_columns.keys())) replacement_columns = get_missing_cells(row, missing_columns) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] if cell.style_id is not None: style = style_table[int(cell.style_id)] cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212 if cell.internal_value is not None: if cell.data_type in Cell.TYPE_STRING: cell = cell._replace(internal_value=unicode(string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212 elif cell.data_type == Cell.TYPE_BOOL: cell = cell._replace(internal_value=cell.internal_value == '1') elif cell.is_date: cell = cell._replace(internal_value=shared_date.from_julian(float(cell.internal_value))) elif cell.data_type == Cell.TYPE_NUMERIC: cell = cell._replace(internal_value=float(cell.internal_value)) elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): cell = cell._replace(internal_value=unicode(cell.internal_value)) full_row.append(cell) else: full_row.append(replacement_columns[column]) current_row = row + 1 yield tuple(full_row)
def check_string(self, value): """Check string coding, length, and line break character""" # convert to unicode string if not isinstance(value, unicode): value = unicode(value, self.encoding) value = unicode(value) # string must never be longer than 32,767 characters # truncate if necessary value = value[:32767] # we require that newline is represented as "\n" in core, # not as "\r\n" or "\r" value = value.replace('\r\n', '\n') return value
def check_string(self, value): """Check string coding, length, and line break character""" # convert to unicode string if not isinstance(value, unicode): value = unicode(value, self.encoding) value = unicode(value) # string must never be longer than 32,767 characters # truncate if necessary value = value[:32767] # we require that newline is represented as "\n" in core, # not as "\r\n" or "\r" value = value.replace("\r\n", "\n") return value
def test_get_xml_iter(): #1 file object #2 stream (file-like) #3 string #4 zipfile from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = "" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp') stream = FUT(f) assert isinstance(stream, tempfile), type(stream) f.close() from zipfile import ZipFile t = TemporaryFile() z = ZipFile(t, mode="w") z.writestr("test", "whatever") stream = FUT(z.open("test")) assert hasattr(stream, "read") z.close()
def test_get_xml_iter(): # 1 file object # 2 stream (file-like) # 3 string # 4 zipfile from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = "" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode="rb+", prefix="openpyxl.", suffix=".unpack.temp") stream = FUT(f) assert isinstance(stream, tempfile), type(stream) f.close() from zipfile import ZipFile t = TemporaryFile() z = ZipFile(t, mode="w") z.writestr("test", "whatever") stream = FUT(z.open("test")) assert hasattr(stream, "read") z.close()
def test_read_complex_formulae(): null_file = os.path.join(DATADIR, 'reader', 'formulae.xlsx') wb = load_workbook(null_file) ws = wb.get_active_sheet() # Test normal forumlae assert ws.cell('A1').data_type != 'f' assert ws.cell('A2').data_type != 'f' assert ws.cell('A3').data_type == 'f' assert 'A3' not in ws.formula_attributes assert ws.cell('A3').value == '=12345' assert ws.cell('A4').data_type == 'f' assert 'A4' not in ws.formula_attributes assert ws.cell('A4').value == '=A2+A3' assert ws.cell('A5').data_type == 'f' assert 'A5' not in ws.formula_attributes assert ws.cell('A5').value == '=SUM(A2:A4)' # Test unicode expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)' # Hack to prevent pytest doing it's own unicode conversion try: expected = unicode(expected, "UTF8") except TypeError: pass assert ws['A16'].value == expected # Test shared forumlae assert ws.cell('B7').data_type == 'f' assert ws.formula_attributes['B7']['t'] == 'shared' assert ws.formula_attributes['B7']['si'] == '0' assert ws.formula_attributes['B7']['ref'] == 'B7:E7' assert ws.cell('B7').value == '=B4*2' assert ws.cell('C7').data_type == 'f' assert ws.formula_attributes['C7']['t'] == 'shared' assert ws.formula_attributes['C7']['si'] == '0' assert 'ref' not in ws.formula_attributes['C7'] assert ws.cell('C7').value == '=' assert ws.cell('D7').data_type == 'f' assert ws.formula_attributes['D7']['t'] == 'shared' assert ws.formula_attributes['D7']['si'] == '0' assert 'ref' not in ws.formula_attributes['D7'] assert ws.cell('D7').value == '=' assert ws.cell('E7').data_type == 'f' assert ws.formula_attributes['E7']['t'] == 'shared' assert ws.formula_attributes['E7']['si'] == '0' assert 'ref' not in ws.formula_attributes['E7'] assert ws.cell('E7').value == '=' # Test array forumlae assert ws.cell('C10').data_type == 'f' assert 'ref' not in ws.formula_attributes['C10']['ref'] assert ws.formula_attributes['C10']['t'] == 'array' assert 'si' not in ws.formula_attributes['C10'] assert ws.formula_attributes['C10']['ref'] == 'C10:C14' assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)' assert ws.cell('C11').data_type != 'f'
def get_string(xmlns, string_index_node): """Read the contents of a specific string index""" rich_nodes = string_index_node.findall(QName(xmlns, 'r').text) if rich_nodes: reconstructed_text = [] for rich_node in rich_nodes: partial_text = get_text(xmlns, rich_node) reconstructed_text.append(partial_text) return unicode(''.join(reconstructed_text)) else: return get_text(xmlns, string_index_node)
def get_string(string_index_node): """Read the contents of a specific string index""" rich_nodes = string_index_node.findall('{%s}r' % SHEET_MAIN_NS) if rich_nodes: reconstructed_text = [] for rich_node in rich_nodes: partial_text = get_text(rich_node) reconstructed_text.append(partial_text) return unicode(''.join(reconstructed_text)) else: return get_text(string_index_node)
class NamedRange(object): """A named group of cells Scope is a worksheet object or None for workbook scope names (the default) """ __slots__ = ('name', 'destinations', 'scope') str_format = unicode('%s!%s') repr_format = unicode('<%s "%s">') def __init__(self, name, destinations, scope=None): self.name = name self.destinations = destinations self.scope = scope def __str__(self): return ','.join([ self.str_format % (sheet, name) for sheet, name in self.destinations ]) def __repr__(self): return self.repr_format % (self.__class__.__name__, str(self))
def assert_equals_file_content(reference_file, fixture, filetype = 'xml'): if os.path.isfile(fixture): fixture_file = open(fixture) try: fixture_content = fixture_file.read() finally: fixture_file.close() else: fixture_content = fixture expected_file = open(reference_file) try: expected_content = expected_file.read() finally: expected_file.close() if filetype == 'xml': fixture_content = fromstring(fixture_content) pretty_indent(fixture_content) temp = BytesIO() ElementTree(fixture_content).write(temp) fixture_content = temp.getvalue() expected_content = fromstring(expected_content) pretty_indent(expected_content) temp = BytesIO() ElementTree(expected_content).write(temp) expected_content = temp.getvalue() fixture_lines = unicode(fixture_content).split('\n') expected_lines = unicode(expected_content).split('\n') differences = list(difflib.unified_diff(expected_lines, fixture_lines)) if differences: temp = BytesIO() pprint(differences, stream = temp) assert False, 'Differences found : %s' % temp.getvalue()
def test_get_xml_iter(): from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = "" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp') stream = FUT(f) assert isinstance(stream, tempfile), type(stream) f.close()
class Worksheet(object): """Represents a worksheet. Do not create worksheets yourself, use :func:`openpyxl.workbook.Workbook.create_sheet` instead """ repr_format = unicode('<Worksheet "%s">') BREAK_NONE = 0 BREAK_ROW = 1 BREAK_COLUMN = 2 SHEETSTATE_VISIBLE = 'visible' SHEETSTATE_HIDDEN = 'hidden' SHEETSTATE_VERYHIDDEN = 'veryHidden' # Paper size PAPERSIZE_LETTER = '1' PAPERSIZE_LETTER_SMALL = '2' PAPERSIZE_TABLOID = '3' PAPERSIZE_LEDGER = '4' PAPERSIZE_LEGAL = '5' PAPERSIZE_STATEMENT = '6' PAPERSIZE_EXECUTIVE = '7' PAPERSIZE_A3 = '8' PAPERSIZE_A4 = '9' PAPERSIZE_A4_SMALL = '10' PAPERSIZE_A5 = '11' # Page orientation ORIENTATION_PORTRAIT = 'portrait' ORIENTATION_LANDSCAPE = 'landscape' def __init__(self, parent_workbook, title='Sheet'): self._parent = parent_workbook self._title = '' if not title: self.title = 'Sheet%d' % (1 + len(self._parent.worksheets)) else: self.title = title self.row_dimensions = {} self.column_dimensions = {} self._cells = {} self._styles = {} self._charts = [] self._images = [] self._merged_cells = [] self.relationships = [] self._data_validations = [] self.selected_cell = 'A1' self.active_cell = 'A1' self.sheet_state = self.SHEETSTATE_VISIBLE self.page_setup = PageSetup() self.page_margins = PageMargins() self.header_footer = HeaderFooter() self.sheet_view = SheetView() self.protection = SheetProtection() self.show_gridlines = True self.print_gridlines = False self.show_summary_below = True self.show_summary_right = True self.default_row_dimension = RowDimension() self.default_column_dimension = ColumnDimension() self._auto_filter = None self._freeze_panes = None self.paper_size = None self.orientation = None def __repr__(self): return self.repr_format % self.title @property def parent(self): return self._parent @property def encoding(self): return self._parent.encoding def garbage_collect(self): """Delete cells that are not storing a value.""" delete_list = [coordinate for coordinate, cell in \ iteritems(self._cells) if (not cell.merged and cell.value in ('', None) and \ (coordinate not in self._styles or hash(cell.style) == _DEFAULTS_STYLE_HASH))] for coordinate in delete_list: del self._cells[coordinate] def get_cell_collection(self): """Return an unordered list of the cells in this worksheet.""" return self._cells.values() def _set_title(self, value): """Set a sheet title, ensuring it is valid.""" bad_title_char_re = re.compile(r'[\\*?:/\[\]]') if bad_title_char_re.search(value): msg = 'Invalid character found in sheet title' raise SheetTitleException(msg) # check if sheet_name already exists # do this *before* length check if self._parent.get_sheet_by_name(value): # use name, but append with lowest possible integer i = 1 while self._parent.get_sheet_by_name('%s%d' % (value, i)): i += 1 value = '%s%d' % (value, i) if len(value) > 31: msg = 'Maximum 31 characters allowed in sheet title' raise SheetTitleException(msg) self._title = value def _get_title(self): """Return the title for this sheet.""" return self._title title = property(_get_title, _set_title, doc='Get or set the title of the worksheet. ' 'Limited to 31 characters, no special characters.') def _set_auto_filter(self, range): # Normalize range to a str or None if not range: range = None elif isinstance(range, str): range = range.upper() else: # Assume a range range = range[0][0].address + ':' + range[-1][-1].address self._auto_filter = range def _get_auto_filter(self): return self._auto_filter auto_filter = property(_get_auto_filter, _set_auto_filter, doc='get or set auto filtering on columns') def _set_freeze_panes(self, topLeftCell): if not topLeftCell: topLeftCell = None elif isinstance(topLeftCell, str): topLeftCell = topLeftCell.upper() else: # Assume a cell topLeftCell = topLeftCell.address if topLeftCell == 'A1': topLeftCell = None self._freeze_panes = topLeftCell def _get_freeze_panes(self): return self._freeze_panes freeze_panes = property(_get_freeze_panes, _set_freeze_panes, doc="Get or set frozen panes") def cell(self, coordinate=None, row=None, column=None): """Returns a cell object based on the given coordinates. Usage: cell(coodinate='A15') **or** cell(row=15, column=1) If `coordinates` are not given, then row *and* column must be given. Cells are kept in a dictionary which is empty at the worksheet creation. Calling `cell` creates the cell in memory when they are first accessed, to reduce memory usage. :param coordinate: coordinates of the cell (e.g. 'B12') :type coordinate: string :param row: row index of the cell (e.g. 4) :type row: int :param column: column index of the cell (e.g. 3) :type column: int :raise: InsufficientCoordinatesException when coordinate or (row and column) are not given :rtype: :class:`openpyxl.cell.Cell` """ if not coordinate: if (row is None or column is None): msg = "You have to provide a value either for " \ "'coordinate' or for 'row' *and* 'column'" raise InsufficientCoordinatesException(msg) else: coordinate = '%s%s' % (get_column_letter(column + 1), row + 1) else: coordinate = coordinate.replace('$', '') return self._get_cell(coordinate) def _get_cell(self, coordinate): if not coordinate in self._cells: column, row = coordinate_from_string(coordinate) new_cell = openpyxl.cell.Cell(self, column, row) self._cells[coordinate] = new_cell if column not in self.column_dimensions: self.column_dimensions[column] = ColumnDimension(column) if row not in self.row_dimensions: self.row_dimensions[row] = RowDimension(row) return self._cells[coordinate] def get_highest_row(self): """Returns the maximum row index containing data :rtype: int """ if self.row_dimensions: return max(self.row_dimensions.keys()) else: return 1 def get_highest_column(self): """Get the largest value for column currently stored. :rtype: int """ if self.column_dimensions: return max([ column_index_from_string(column_index) for column_index in self.column_dimensions ]) else: return 1 def calculate_dimension(self): """Return the minimum bounding range for all cells containing data.""" return 'A1:%s%d' % (get_column_letter( self.get_highest_column()), self.get_highest_row()) def range(self, range_string, row=0, column=0): """Returns a 2D array of cells, with optional row and column offsets. :param range_string: cell range string or `named range` name :type range_string: string :param row: number of rows to offset :type row: int :param column: number of columns to offset :type column: int :rtype: tuples of tuples of :class:`openpyxl.cell.Cell` """ if ':' in range_string: # R1C1 range result = [] min_range, max_range = range_string.split(':') min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) if column: min_col = get_column_letter( column_index_from_string(min_col) + column) max_col = get_column_letter( column_index_from_string(max_col) + column) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) cache_cols = {} for col in xrange(min_col, max_col + 1): cache_cols[col] = get_column_letter(col) rows = xrange(min_row + row, max_row + row + 1) cols = xrange(min_col, max_col + 1) for row in rows: new_row = [] for col in cols: new_row.append(self.cell('%s%s' % (cache_cols[col], row))) result.append(tuple(new_row)) return tuple(result) else: try: return self.cell(coordinate=range_string, row=row, column=column) except CellCoordinatesException: pass # named range named_range = self._parent.get_named_range(range_string) if named_range is None: msg = '%s is not a valid range name' % range_string raise NamedRangeException(msg) if isinstance(named_range, NamedRangeContainingValue): msg = '%s refers to a value, not a range' % range_string raise NamedRangeException(msg) result = [] for destination in named_range.destinations: worksheet, cells_range = destination if worksheet is not self: msg = 'Range %s is not defined on worksheet %s' % \ (cells_range, self.title) raise NamedRangeException(msg) content = self.range(cells_range) if isinstance(content, tuple): for cells in content: result.extend(cells) else: result.append(content) if len(result) == 1: return result[0] else: return tuple(result) def get_style(self, coordinate): """Return the style object for the specified cell.""" if not coordinate in self._styles: self._styles[coordinate] = Style() return self._styles[coordinate] def set_printer_settings(self, paper_size, orientation): """Set printer settings """ self.paper_size = paper_size assert orientation in ( self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE), "Values should be %s or %s" % ( self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE) self.orientation = orientation def create_relationship(self, rel_type): """Add a relationship for this sheet.""" rel = Relationship(rel_type) self.relationships.append(rel) rel_id = self.relationships.index(rel) rel.id = 'rId' + str(rel_id + 1) return self.relationships[rel_id] def add_data_validation(self, data_validation): """ Add a data-validation object to the sheet. The data-validation object defines the type of data-validation to be applied and the cell or range of cells it should apply to. """ data_validation._sheet = self self._data_validations.append(data_validation) def add_chart(self, chart): """ Add a chart to the sheet """ chart._sheet = self self._charts.append(chart) def add_image(self, img): """ Add an image to the sheet """ img._sheet = self self._images.append(img) def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') # Make sure top_left cell exists - is this necessary? min_col, min_row = coordinate_from_string(range_string.split(':')[0]) max_col, max_row = coordinate_from_string(range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Blank out the rest of the cells in the range for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): # PHPExcel adds cell and specifically blanks it out if it doesn't exist self._get_cell('%s%s' % (get_column_letter(col), row)).value = None self._get_cell('%s%s' % (get_column_letter(col), row)).merged = True if range_string not in self._merged_cells: self._merged_cells.append(range_string) def unmerge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Remove merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') if range_string in self._merged_cells: self._merged_cells.remove(range_string) min_col, min_row = coordinate_from_string( range_string.split(':')[0]) max_col, max_row = coordinate_from_string( range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Mark cell as unmerged for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): self._get_cell( '%s%s' % (get_column_letter(col), row)).merged = False else: msg = 'Cell range %s not known as merged.' % range_string raise InsufficientCoordinatesException(msg) def append(self, list_or_dict): """Appends a group of values at the bottom of the current sheet. * If it's a list: all values are added in order, starting from the first column * If it's a dict: values are assigned to the columns indicated by the keys (numbers or letters) :param list_or_dict: list or dict containing values to append :type list_or_dict: list/tuple or dict Usage: * append(['This is A1', 'This is B1', 'This is C1']) * **or** append({'A' : 'This is A1', 'C' : 'This is C1'}) * **or** append({0 : 'This is A1', 2 : 'This is C1'}) :raise: TypeError when list_or_dict is neither a list/tuple nor a dict """ row_idx = len(self.row_dimensions) if isinstance(list_or_dict, (list, tuple)): for col_idx, content in enumerate(list_or_dict): self.cell(row=row_idx, column=col_idx).value = content elif isinstance(list_or_dict, dict): for col_idx, content in iteritems(list_or_dict): if isinstance(col_idx, basestring): col_idx = column_index_from_string(col_idx) - 1 self.cell(row=row_idx, column=col_idx).value = content else: raise TypeError('list_or_dict must be a list or a dict') @property def rows(self): return self.range(self.calculate_dimension()) @property def columns(self): max_row = self.get_highest_row() cols = [] for col_idx in range(self.get_highest_column()): col = get_column_letter(col_idx + 1) res = self.range('%s1:%s%d' % (col, col, max_row)) cols.append(tuple([x[0] for x in res])) return tuple(cols) def point_pos(self, left=0, top=0): """ tells which cell is under the given coordinates (in pixels) counting from the top-left corner of the sheet. Can be used to locate images and charts on the worksheet """ current_col = 1 current_row = 1 column_dimensions = self.column_dimensions row_dimensions = self.row_dimensions default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH) default_height = points_to_pixels(DEFAULT_ROW_HEIGHT) left_pos = 0 top_pos = 0 while left_pos <= left: letter = get_column_letter(current_col) current_col += 1 if letter in column_dimensions: cdw = column_dimensions[letter].width if cdw > 0: left_pos += points_to_pixels(cdw) continue left_pos += default_width while top_pos <= top: row = current_row current_row += 1 if row in row_dimensions: rdh = row_dimensions[row].height if rdh > 0: top_pos += points_to_pixels(rdh) continue top_pos += default_height return (letter, row)
def check_error(self, value): """Tries to convert Error" else N/A""" try: return unicode(value) except: return unicode('#N/A')
def __repr__(self): return unicode("<Cell %s.%s>") % (self.parent.title, self.get_coordinate())
def load_workbook(filename, use_iterators=False, keep_vba=False, guess_types=True, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ is_file_instance = isinstance(filename, file) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if 'b' not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: try: f = repair_central_directory(filename, is_file_instance) archive = ZipFile(f, 'r', ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook(guess_types=guess_types, data_only=data_only) if use_iterators: wb._set_optimized_read() if not guess_types: warnings.warn('please note that data types are not guessed ' 'when using iterator reader, so you do not need ' 'to use guess_types=False') try: _load_workbook(wb, archive, filename, use_iterators, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) if not keep_vba: archive.close() return wb
def load_workbook(filename, use_iterators=False, keep_vba=False, guess_types=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ try: # Python 2 is_file_instance = isinstance(filename, file) except NameError: # Python 3 from io import BufferedReader is_file_instance = isinstance(filename, BufferedReader) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if 'b' not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: try: f = repair_central_directory(filename, is_file_instance) archive = ZipFile(f, 'r', ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook(guess_types=guess_types) if use_iterators: wb._set_optimized_read() if not guess_types: warnings.warn('please note that data types are not guessed ' 'when using iterator reader, so you do not need ' 'to use guess_types=False') try: _load_workbook(wb, archive, filename, use_iterators, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) if not keep_vba: archive.close() return wb