def unmerge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Remove merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % (get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') if range_string in self._merged_cells: self._merged_cells.remove(range_string) min_col, min_row = coordinate_from_string(range_string.split(':')[0]) max_col, max_row = coordinate_from_string(range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Mark cell as unmerged for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): self._get_cell('%s%s' % (get_column_letter(col), row)).merged = False else: msg = 'Cell range %s not known as merged.' % range_string raise InsufficientCoordinatesException(msg)
def test_dump_sheet(): test_filename = _get_test_filename() wb = Workbook(optimized_write=True) ws = wb.create_sheet() letters = [get_column_letter(x + 1) for x in xrange(20)] expected_rows = [] for row in xrange(20): expected_rows.append( ['%s%d' % (letter, row + 1) for letter in letters]) for row in xrange(20): expected_rows.append([(row + 1) for letter in letters]) for row in xrange(10): expected_rows.append([ datetime(2010, ((x % 12) + 1), row + 1) for x in range(len(letters)) ]) for row in xrange(20): expected_rows.append( ['=%s%d' % (letter, row + 1) for letter in letters]) for row in expected_rows: ws.append(row) wb.save(test_filename) wb2 = load_workbook(test_filename) ws = wb2.worksheets[0] for ex_row, ws_row in zip(expected_rows[:-20], ws.rows): for ex_cell, ws_cell in zip(ex_row, ws_row): assert ex_cell == ws_cell.value os.remove(test_filename)
def get_squared_range(self, min_col, min_row, max_col, max_row): """ The source worksheet file may have columns or rows missing. Missing cells will be created. """ if max_col is not None: expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)] else: expected_columns = [] row_counter = min_row # get cells row by row for row, cells in groupby(self.get_cells(min_row, min_col, max_row, max_col), operator.attrgetter('row')): full_row = [] if row_counter < row: # Rows requested before those in the worksheet for gap_row in xrange(row_counter, row): yield tuple(EMPTY_CELL for column in expected_columns) row_counter = row if expected_columns: retrieved_columns = dict([(c.column, c) for c in cells]) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] full_row.append(cell) else: # create missing cell full_row.append(EMPTY_CELL) else: full_row = tuple(cells) row_counter = row + 1 yield tuple(full_row)
def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % (get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') # Make sure top_left cell exists - is this necessary? min_col, min_row = coordinate_from_string(range_string.split(':')[0]) max_col, max_row = coordinate_from_string(range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Blank out the rest of the cells in the range for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): # PHPExcel adds cell and specifically blanks it out if it doesn't exist self._get_cell('%s%s' % (get_column_letter(col), row)).value = None self._get_cell('%s%s' % (get_column_letter(col), row)).merged = True if range_string not in self._merged_cells: self._merged_cells.append(range_string)
def test_dump_sheet(): test_filename = _get_test_filename() wb = Workbook(optimized_write=True) ws = wb.create_sheet() letters = [get_column_letter(x + 1) for x in xrange(20)] expected_rows = [] for row in xrange(20): expected_rows.append(['%s%d' % (letter, row + 1) for letter in letters]) for row in xrange(20): expected_rows.append([(row + 1) for letter in letters]) for row in xrange(10): expected_rows.append([datetime(2010, ((x % 12) + 1), row + 1) for x in range(len(letters))]) for row in xrange(20): expected_rows.append(['=%s%d' % (letter, row + 1) for letter in letters]) for row in expected_rows: ws.append(row) wb.save(test_filename) wb2 = load_workbook(test_filename) ws = wb2.worksheets[0] for ex_row, ws_row in zip(expected_rows[:-20], ws.rows): for ex_cell, ws_cell in zip(ex_row, ws_row): assert ex_cell == ws_cell.value os.remove(test_filename)
def get_squared_range(self, min_col, min_row, max_col, max_row): expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)] current_row = min_row style_table = self._style_table for row, cells in groupby(self.get_cells(min_row, min_col, max_row, max_col), operator.attrgetter('row')): full_row = [] if current_row < row: for gap_row in xrange(current_row, row): dummy_cells = get_missing_cells(gap_row, expected_columns) yield tuple([dummy_cells[column] for column in expected_columns]) current_row = row temp_cells = list(cells) retrieved_columns = dict([(c.column, c) for c in temp_cells]) missing_columns = list(set(expected_columns) - set(retrieved_columns.keys())) replacement_columns = get_missing_cells(row, missing_columns) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] if cell.style_id is not None: style = style_table[int(cell.style_id)] cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212 if cell.internal_value is not None: if cell.data_type in Cell.TYPE_STRING: cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212 elif cell.data_type == Cell.TYPE_BOOL: cell = cell._replace(internal_value=cell.internal_value == '1') elif cell.is_date: cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value))) elif cell.data_type == Cell.TYPE_NUMERIC: cell = cell._replace(internal_value=float(cell.internal_value)) elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): cell = cell._replace(internal_value=unicode(cell.internal_value)) full_row.append(cell) else: full_row.append(replacement_columns[column]) current_row = row + 1 yield tuple(full_row)
def unmerge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Remove merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') if range_string in self._merged_cells: self._merged_cells.remove(range_string) min_col, min_row = coordinate_from_string( range_string.split(':')[0]) max_col, max_row = coordinate_from_string( range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Mark cell as unmerged for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): self._get_cell( '%s%s' % (get_column_letter(col), row)).merged = False else: msg = 'Cell range %s not known as merged.' % range_string raise InsufficientCoordinatesException(msg)
class TestFloats(TestWorksheet): sheet_name = 'Sheet2 - Numbers' query_range = 'K1:K30' expected = expected = [[(x + 1) / 100.0] for x in xrange(30)] def test_read_fast_integrated(self): wb = self._open_wb() ws = wb.get_sheet_by_name(name=self.sheet_name) for row, expected_row in izip(ws.iter_rows(self.query_range), self.expected): row_values = [x.value for x in row] assert row_values == expected_row
def test_illegal_chacters(): from openpyxl.exceptions import IllegalCharacterError from openpyxl.compat import xrange from itertools import chain ws = build_dummy_worksheet() cell = Cell(ws, 'A', 1) # The bytes 0x00 through 0x1F inclusive must be manually escaped in values. illegal_chrs = chain(xrange(9), xrange(11, 13), xrange(14, 32)) for i in illegal_chrs: with pytest.raises(IllegalCharacterError): cell.value = chr(i) with pytest.raises(IllegalCharacterError): cell.value = "A {0} B".format(chr(i)) cell.value = chr(33) cell.value = chr(9) # Tab cell.value = chr(10) # Newline cell.value = chr(13) # Carriage return cell.value = " Leading and trailing spaces are legal "
def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') # Make sure top_left cell exists - is this necessary? min_col, min_row = coordinate_from_string(range_string.split(':')[0]) max_col, max_row = coordinate_from_string(range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Blank out the rest of the cells in the range for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): # PHPExcel adds cell and specifically blanks it out if it doesn't exist self._get_cell('%s%s' % (get_column_letter(col), row)).value = None self._get_cell('%s%s' % (get_column_letter(col), row)).merged = True if range_string not in self._merged_cells: self._merged_cells.append(range_string)
def range(self, range_string, row=0, column=0): """Returns a 2D array of cells, with optional row and column offsets. :param range_string: cell range string or `named range` name :type range_string: string :param row: number of rows to offset :type row: int :param column: number of columns to offset :type column: int :rtype: tuples of tuples of :class:`openpyxl.cell.Cell` """ if ":" in range_string: # R1C1 range result = [] min_range, max_range = range_string.split(":") min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) if column: min_col = get_column_letter(column_index_from_string(min_col) + column) max_col = get_column_letter(column_index_from_string(max_col) + column) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) cache_cols = {} for col in xrange(min_col, max_col + 1): cache_cols[col] = get_column_letter(col) rows = xrange(min_row + row, max_row + row + 1) cols = xrange(min_col, max_col + 1) for row in rows: new_row = [] for col in cols: new_row.append(self.cell("%s%s" % (cache_cols[col], row))) result.append(tuple(new_row)) return tuple(result) else: try: return self.cell(coordinate=range_string, row=row, column=column) except CellCoordinatesException: pass # named range named_range = self._parent.get_named_range(range_string) if named_range is None: msg = "%s is not a valid range name" % range_string raise NamedRangeException(msg) if isinstance(named_range, NamedRangeContainingValue): msg = "%s refers to a value, not a range" % range_string raise NamedRangeException(msg) result = [] for destination in named_range.destinations: worksheet, cells_range = destination if worksheet is not self: msg = "Range %s is not defined on worksheet %s" % (cells_range, self.title) raise NamedRangeException(msg) content = self.range(cells_range) if isinstance(content, tuple): for cells in content: result.extend(cells) else: result.append(content) if len(result) == 1: return result[0] else: return tuple(result)
# these indicies corrospond to A -> ZZZ and include all allowed # columns if not 1 <= col_idx <= 18278: raise ValueError("Invalid column index {0}".format(col_idx)) letters = [] while col_idx > 0: col_idx, remainder = divmod(col_idx, 26) # check for exact division and borrow if needed if remainder == 0: remainder = 26 col_idx -= 1 letters.append(chr(remainder+64)) return ''.join(reversed(letters)) _COL_STRING_CACHE = dict((get_column_letter(i), i) for i in xrange(1, 18279)) def column_index_from_string(str_col, cache=_COL_STRING_CACHE): # we use a function argument to get indexed name lookup col = cache.get(str_col.upper()) if col is None: raise ValueError("{0} is not a valid column name".format(str_col)) return col del _COL_STRING_CACHE PERCENT_REGEX = re.compile(r'^\-?(?P<number>[0-9]*\.?[0-9]*\s?)\%$') TIME_REGEX = re.compile(r""" ^(?: # HH:MM and HH:MM:SS (?P<hour>[0-1]{0,1}[0-9]{2}): (?P<minute>[0-5][0-9]):? (?P<second>[0-5][0-9])?$)
def range(self, range_string, row=0, column=0): """Returns a 2D array of cells, with optional row and column offsets. :param range_string: cell range string or `named range` name :type range_string: string :param row: number of rows to offset :type row: int :param column: number of columns to offset :type column: int :rtype: tuples of tuples of :class:`openpyxl.cell.Cell` """ if ':' in range_string: # R1C1 range result = [] min_range, max_range = range_string.split(':') min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) if column: min_col = get_column_letter( column_index_from_string(min_col) + column) max_col = get_column_letter( column_index_from_string(max_col) + column) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) cache_cols = {} for col in xrange(min_col, max_col + 1): cache_cols[col] = get_column_letter(col) rows = xrange(min_row + row, max_row + row + 1) cols = xrange(min_col, max_col + 1) for row in rows: new_row = [] for col in cols: new_row.append(self.cell('%s%s' % (cache_cols[col], row))) result.append(tuple(new_row)) return tuple(result) else: try: return self.cell(coordinate=range_string, row=row, column=column) except CellCoordinatesException: pass # named range named_range = self._parent.get_named_range(range_string) if named_range is None: msg = '%s is not a valid range name' % range_string raise NamedRangeException(msg) if isinstance(named_range, NamedRangeContainingValue): msg = '%s refers to a value, not a range' % range_string raise NamedRangeException(msg) result = [] for destination in named_range.destinations: worksheet, cells_range = destination if worksheet is not self: msg = 'Range %s is not defined on worksheet %s' % \ (cells_range, self.title) raise NamedRangeException(msg) content = self.range(cells_range) if isinstance(content, tuple): for cells in content: result.extend(cells) else: result.append(content) if len(result) == 1: return result[0] else: return tuple(result)
from openpyxl.styles import is_date_format from openpyxl.date_time import SharedDate from openpyxl.reader.worksheet import read_dimension from openpyxl.xml.ooxml import ( PACKAGE_WORKSHEETS, SHEET_MAIN_NS ) TYPE_NULL = Cell.TYPE_NULL MISSING_VALUE = None RE_COORDINATE = re.compile('^([A-Z]+)([0-9]+)$') SHARED_DATE = SharedDate() _COL_CONVERSION_CACHE = dict((get_column_letter(i), i) for i in xrange(1, 18279)) def column_index_from_string(str_col, _col_conversion_cache=_COL_CONVERSION_CACHE): # we use a function argument to get indexed name lookup return _col_conversion_cache[str_col] del _COL_CONVERSION_CACHE RAW_ATTRIBUTES = ['row', 'column', 'coordinate', 'internal_value', 'data_type', 'style_id', 'number_format'] BaseRawCell = namedtuple('RawCell', RAW_ATTRIBUTES) class RawCell(BaseRawCell): """Optimized version of the :class:`openpyxl.cell.Cell`, using named tuples.
# these indicies corrospond to A -> ZZZ and include all allowed # columns if not 1 <= col_idx <= 18278: raise ValueError("Invalid column index {0}".format(col_idx)) letters = [] while col_idx > 0: col_idx, remainder = divmod(col_idx, 26) # check for exact division and borrow if needed if remainder == 0: remainder = 26 col_idx -= 1 letters.append(chr(remainder + 64)) return ''.join(reversed(letters)) _COL_STRING_CACHE = dict((get_column_letter(i), i) for i in xrange(1, 18279)) def column_index_from_string(str_col, cache=_COL_STRING_CACHE): # we use a function argument to get indexed name lookup col = cache.get(str_col.upper()) if col is None: raise ValueError("{0} is not a valid column name".format(str_col)) return col del _COL_STRING_CACHE PERCENT_REGEX = re.compile(r'^\-?(?P<number>[0-9]*\.?[0-9]*\s?)\%$') TIME_REGEX = re.compile( r"""