def get_text(rich_node): """Read rich text, discarding formatting if not disallowed""" text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS) partial_text = text_node.text or unicode('') if text_node.get('{%s}space' % XML_NS) != 'preserve': partial_text = partial_text.strip() return unicode(partial_text)
def get_text(rich_node): """Read rich text, discarding formatting if not disallowed""" text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS) text = text_node.text or unicode('') if text_node.get('{%s}space' % XML_NS) != 'preserve': text = text.strip() # fix XML escaping sequence for '_x' text = text.replace('x005F_', '') return unicode(text)
def value(self): if self._value is None: return if self.data_type == Cell.TYPE_BOOL: return self._value == '1' elif self.is_date: return from_excel(self._value, self.base_date) elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): return unicode(self._value) elif self.data_type == Cell.TYPE_STRING: return unicode(self.shared_strings[int(self._value)]) return self._value
def check_string(self, value): """Check string coding, length, and line break character""" if value is None: return # convert to unicode string if not isinstance(value, unicode): value = unicode(value, self.encoding) value = unicode(value) # string must never be longer than 32,767 characters # truncate if necessary value = value[:32767] if next(ILLEGAL_CHARACTERS_RE.finditer(value), None): raise IllegalCharacterError return value
def value(self): if self._value is None: return if self.data_type == 'n': if is_date_format(self.number_format): return from_excel(self._value, self.base_date) return self._value if self.data_type == 'b': return self._value == '1' elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): return unicode(self._value) elif self.data_type == 's': return unicode(self.shared_strings[int(self._value)]) return self._value
def check_string(self, value): """Check string coding, length, and line break character""" # convert to unicode string if not isinstance(value, unicode): value = unicode(value, self.encoding) value = unicode(value) # string must never be longer than 32,767 characters # truncate if necessary value = value[:32767] if ILLEGAL_CHARACTERS_RE.match(value): raise IllegalCharacterError # we require that newline is represented as "\n" in core, # not as "\r\n" or "\r" value = value.replace('\r\n', '\n') return value
def value(self): if self._value is None: return if self.data_type == 'n': if is_date_format(self.number_format): return from_excel(self._value, self.base_date) return self._value if self.data_type == 'b': return self._value == '1' elif self.data_type in (Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): return unicode(self._value) elif self.data_type == 's': return unicode(self.shared_strings[int(self._value)]) return self._value
def to_tree(self, tagname): """ Return as XML node """ el = Element(tagname) el.text = unicode(self) return el
def set_formula(self, addr, formula): if addr in self.cellmap: cell = self.cellmap[addr] else: raise Exception('Cell %s not in cellmap' % addr) seeds = [cell] if cell.is_range: for index, c in enumerate(cell.range.cells): # for each cell of the range, translate the formula if index == 0: c.formula = formula translator = Translator(unicode('=' + formula), c.address().split('!')[1]) # the Translator needs a reference without sheet else: translated = translator.translate_formula(c.address().split('!')[1]) # the Translator needs a reference without sheet c.formula = translated[1:] # to get rid of the '=' seeds.append(c) else: cell.formula = formula cellmap, G = graph_from_seeds(seeds, self) self.cellmap = cellmap self.G = G should_eval = self.cellmap[addr].should_eval self.cellmap[addr].should_eval = 'always' self.evaluate(addr) self.cellmap[addr].should_eval = should_eval print("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap)))
def test_get_xml_iter(): #1 file object #2 stream (file-like) #3 string #4 zipfile from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = "" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp') stream = FUT(f) assert isinstance(stream, tempfile), type(stream) f.close() from zipfile import ZipFile t = TemporaryFile() z = ZipFile(t, mode="w") z.writestr("test", "whatever") stream = FUT(z.open("test")) assert hasattr(stream, "read") z.close()
def _add_table_headers(ws): """ Check if tables have tableColumns and create them and autoFilter if necessary. Column headers will be taken from the first row of the table. """ tables = TablePartList() for table in ws._tables: if not table.tableColumns: table._initialise_columns() if table.headerRowCount: row = ws[table.ref][0] for cell, col in zip(row, table.tableColumns): if cell.data_type != "s": warn( "File may not be readable: column headings must be strings." ) col.name = unicode(cell.value) rel = Relationship(Type=table._rel_type, Target="") ws._rels.append(rel) table._rel_id = rel.Id tables.append(Related(id=rel.Id)) return tables
def test_xfs_fonts(self): st = Style(font=Font(size=12, bold=True)) self.worksheet.cell('A1').style = st w = StyleWriter(self.workbook) nft = borders = fills = DummyElement() fonts = Element("fonts") w._write_cell_xfs(nft, fonts, fills, borders) xml = unicode(tostring(w._root)) assert """applyFont="1" """ in xml assert """fontId="1" """ in xml expected = """ <fonts count="2"> <font> <sz val="12.0" /> <color rgb="00000000"></color> <name val="Calibri" /> <family val="2" /> <b></b> </font> </fonts> """ xml = tostring(fonts) diff = compare_xml(xml, expected) assert diff is None, diff
def test_get_xml_iter(): #1 file object #2 stream (file-like) #3 string #4 zipfile from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = b"" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp') stream = FUT(f) assert stream == f f.close() t = TemporaryFile() z = ZipFile(t, mode="w") z.writestr("test", "whatever") stream = FUT(z.open("test")) assert hasattr(stream, "read") try: z.close() except IOError: # you can't just close zipfiles in Windows z.close() # python 2.7
def __str__(self): """ Pack parts into a single string """ TRANSFORM = { '&[Tab]': '&A', '&[Pages]': '&N', '&[Date]': '&D', '&[Path]': '&Z', '&[Page]': '&P', '&[Time]': '&T', '&[File]': '&F', '&[Picture]': '&G' } # escape keys and create regex SUBS_REGEX = re.compile("|".join( ["({0})".format(re.escape(k)) for k in TRANSFORM])) def replace(match): """ Callback for re.sub Replace expanded control with mini-format equivalent """ sub = match.group(0) return TRANSFORM[sub] txt = [] for key, part in zip(self.__keys, [self.left, self.center, self.right]): if part.text is not None: txt.append(u"&{0}{1}".format(key, unicode(part))) txt = "".join(txt) txt = SUBS_REGEX.sub(replace, txt) return escape(txt)
def __str__(self): """ Pack parts into a single string """ TRANSFORM = {'&[Tab]': '&A', '&[Pages]': '&N', '&[Date]': '&D', '&[Path]': '&Z', '&[Page]': '&P', '&[Time]': '&T', '&[File]': '&F', '&[Picture]': '&G'} # escape keys and create regex SUBS_REGEX = re.compile("|".join(["({0})".format(re.escape(k)) for k in TRANSFORM])) def replace(match): """ Callback for re.sub Replace expanded control with mini-format equivalent """ sub = match.group(0) return TRANSFORM[sub] txt = [] for key, part in zip( self.__keys, [self.left, self.center, self.right]): if part.text is not None: txt.append(u"&{0}{1}".format(key, unicode(part))) txt = "".join(txt) txt = SUBS_REGEX.sub(replace, txt) return escape(txt)
def test_get_xml_iter(): #1 file object #2 stream (file-like) #3 string #4 zipfile from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = b"" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp') stream = FUT(f) assert isinstance(stream, tempfile), type(stream) f.close() from zipfile import ZipFile t = TemporaryFile() z = ZipFile(t, mode="w") z.writestr("test", "whatever") stream = FUT(z.open("test")) assert hasattr(stream, "read") z.close()
def check_string(self, value): """Check string coding, length, and line break character""" if value is None: return # convert to unicode string if not isinstance(value, unicode): value = unicode(value, self.encoding) value = unicode(value) # string must never be longer than 32,767 characters # truncate if necessary value = value[:32767] if next(ILLEGAL_CHARACTERS_RE.finditer(value), None): raise IllegalCharacterError # we require that newline is represented as "\n" in core, # not as "\r\n" or "\r" value = value.replace('\r\n', '\n') return value
def test_read_complex_formulae(): null_file = os.path.join(DATADIR, 'reader', 'formulae.xlsx') wb = load_workbook(null_file) ws = wb.get_active_sheet() # Test normal forumlae assert ws.cell('A1').data_type != 'f' assert ws.cell('A2').data_type != 'f' assert ws.cell('A3').data_type == 'f' assert 'A3' not in ws.formula_attributes assert ws.cell('A3').value == '=12345' assert ws.cell('A4').data_type == 'f' assert 'A4' not in ws.formula_attributes assert ws.cell('A4').value == '=A2+A3' assert ws.cell('A5').data_type == 'f' assert 'A5' not in ws.formula_attributes assert ws.cell('A5').value == '=SUM(A2:A4)' # Test unicode expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)' # Hack to prevent pytest doing it's own unicode conversion try: expected = unicode(expected, "UTF8") except TypeError: pass assert ws['A16'].value == expected # Test shared forumlae assert ws.cell('B7').data_type == 'f' assert ws.formula_attributes['B7']['t'] == 'shared' assert ws.formula_attributes['B7']['si'] == '0' assert ws.formula_attributes['B7']['ref'] == 'B7:E7' assert ws.cell('B7').value == '=B4*2' assert ws.cell('C7').data_type == 'f' assert ws.formula_attributes['C7']['t'] == 'shared' assert ws.formula_attributes['C7']['si'] == '0' assert 'ref' not in ws.formula_attributes['C7'] assert ws.cell('C7').value == '=' assert ws.cell('D7').data_type == 'f' assert ws.formula_attributes['D7']['t'] == 'shared' assert ws.formula_attributes['D7']['si'] == '0' assert 'ref' not in ws.formula_attributes['D7'] assert ws.cell('D7').value == '=' assert ws.cell('E7').data_type == 'f' assert ws.formula_attributes['E7']['t'] == 'shared' assert ws.formula_attributes['E7']['si'] == '0' assert 'ref' not in ws.formula_attributes['E7'] assert ws.cell('E7').value == '=' # Test array forumlae assert ws.cell('C10').data_type == 'f' assert 'ref' not in ws.formula_attributes['C10']['ref'] assert ws.formula_attributes['C10']['t'] == 'array' assert 'si' not in ws.formula_attributes['C10'] assert ws.formula_attributes['C10']['ref'] == 'C10:C14' assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)' assert ws.cell('C11').data_type != 'f'
def test_read_complex_formulae(datadir): datadir.join("reader").chdir() wb = load_workbook('formulae.xlsx') ws = wb.get_active_sheet() # Test normal forumlae assert ws.cell('A1').data_type != 'f' assert ws.cell('A2').data_type != 'f' assert ws.cell('A3').data_type == 'f' assert 'A3' not in ws.formula_attributes assert ws.cell('A3').value == '=12345' assert ws.cell('A4').data_type == 'f' assert 'A4' not in ws.formula_attributes assert ws.cell('A4').value == '=A2+A3' assert ws.cell('A5').data_type == 'f' assert 'A5' not in ws.formula_attributes assert ws.cell('A5').value == '=SUM(A2:A4)' # Test unicode expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)' # Hack to prevent pytest doing it's own unicode conversion try: expected = unicode(expected, "UTF8") except TypeError: pass assert ws['A16'].value == expected # Test shared forumlae assert ws.cell('B7').data_type == 'f' assert ws.formula_attributes['B7']['t'] == 'shared' assert ws.formula_attributes['B7']['si'] == '0' assert ws.formula_attributes['B7']['ref'] == 'B7:E7' assert ws.cell('B7').value == '=B4*2' assert ws.cell('C7').data_type == 'f' assert ws.formula_attributes['C7']['t'] == 'shared' assert ws.formula_attributes['C7']['si'] == '0' assert 'ref' not in ws.formula_attributes['C7'] assert ws.cell('C7').value == '=' assert ws.cell('D7').data_type == 'f' assert ws.formula_attributes['D7']['t'] == 'shared' assert ws.formula_attributes['D7']['si'] == '0' assert 'ref' not in ws.formula_attributes['D7'] assert ws.cell('D7').value == '=' assert ws.cell('E7').data_type == 'f' assert ws.formula_attributes['E7']['t'] == 'shared' assert ws.formula_attributes['E7']['si'] == '0' assert 'ref' not in ws.formula_attributes['E7'] assert ws.cell('E7').value == '=' # Test array forumlae assert ws.cell('C10').data_type == 'f' assert 'ref' not in ws.formula_attributes['C10']['ref'] assert ws.formula_attributes['C10']['t'] == 'array' assert 'si' not in ws.formula_attributes['C10'] assert ws.formula_attributes['C10']['ref'] == 'C10:C14' assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)' assert ws.cell('C11').data_type != 'f'
def test_read_complex_formulae(datadir): datadir.join("reader").chdir() wb = load_workbook("formulae.xlsx") ws = wb.get_active_sheet() # Test normal forumlae assert ws.cell("A1").data_type != "f" assert ws.cell("A2").data_type != "f" assert ws.cell("A3").data_type == "f" assert "A3" not in ws.formula_attributes assert ws.cell("A3").value == "=12345" assert ws.cell("A4").data_type == "f" assert "A4" not in ws.formula_attributes assert ws.cell("A4").value == "=A2+A3" assert ws.cell("A5").data_type == "f" assert "A5" not in ws.formula_attributes assert ws.cell("A5").value == "=SUM(A2:A4)" # Test unicode expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)' # Hack to prevent pytest doing it's own unicode conversion try: expected = unicode(expected, "UTF8") except TypeError: pass assert ws["A16"].value == expected # Test shared forumlae assert ws.cell("B7").data_type == "f" assert ws.formula_attributes["B7"]["t"] == "shared" assert ws.formula_attributes["B7"]["si"] == "0" assert ws.formula_attributes["B7"]["ref"] == "B7:E7" assert ws.cell("B7").value == "=B4*2" assert ws.cell("C7").data_type == "f" assert ws.formula_attributes["C7"]["t"] == "shared" assert ws.formula_attributes["C7"]["si"] == "0" assert "ref" not in ws.formula_attributes["C7"] assert ws.cell("C7").value == "=" assert ws.cell("D7").data_type == "f" assert ws.formula_attributes["D7"]["t"] == "shared" assert ws.formula_attributes["D7"]["si"] == "0" assert "ref" not in ws.formula_attributes["D7"] assert ws.cell("D7").value == "=" assert ws.cell("E7").data_type == "f" assert ws.formula_attributes["E7"]["t"] == "shared" assert ws.formula_attributes["E7"]["si"] == "0" assert "ref" not in ws.formula_attributes["E7"] assert ws.cell("E7").value == "=" # Test array forumlae assert ws.cell("C10").data_type == "f" assert "ref" not in ws.formula_attributes["C10"]["ref"] assert ws.formula_attributes["C10"]["t"] == "array" assert "si" not in ws.formula_attributes["C10"] assert ws.formula_attributes["C10"]["ref"] == "C10:C14" assert ws.cell("C10").value == "=SUM(A10:A14*B10:B14)" assert ws.cell("C11").data_type != "f"
def get_string(string_index_node): """Read the contents of a specific string index""" rich_nodes = string_index_node.findall('{%s}r' % SHEET_MAIN_NS) if rich_nodes: reconstructed_text = [] for rich_node in rich_nodes: partial_text = get_text(rich_node) reconstructed_text.append(partial_text) return unicode(''.join(reconstructed_text)) return get_text(string_index_node)
def get_squared_range(self, min_col, min_row, max_col, max_row): expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)] current_row = min_row style_table = self._style_table for row, cells in groupby(self.get_cells(min_row, min_col, max_row, max_col), operator.attrgetter('row')): full_row = [] if current_row < row: for gap_row in xrange(current_row, row): dummy_cells = get_missing_cells(gap_row, expected_columns) yield tuple([dummy_cells[column] for column in expected_columns]) current_row = row temp_cells = list(cells) retrieved_columns = dict([(c.column, c) for c in temp_cells]) missing_columns = list(set(expected_columns) - set(retrieved_columns.keys())) replacement_columns = get_missing_cells(row, missing_columns) for column in expected_columns: if column in retrieved_columns: cell = retrieved_columns[column] if cell.style_id is not None: style = style_table[int(cell.style_id)] cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212 if cell.internal_value is not None: if cell.data_type in Cell.TYPE_STRING: cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212 elif cell.data_type == Cell.TYPE_BOOL: cell = cell._replace(internal_value=cell.internal_value == '1') elif cell.is_date: cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value))) elif cell.data_type == Cell.TYPE_NUMERIC: cell = cell._replace(internal_value=float(cell.internal_value)) elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING): cell = cell._replace(internal_value=unicode(cell.internal_value)) full_row.append(cell) else: full_row.append(replacement_columns[column]) current_row = row + 1 yield tuple(full_row)
def doTransfer(file_name,db_name): # Replace with a database name con = sqlite3.connect(db_name) # replace with the complete path to youe excel workbook wb = load_workbook(filename=file_name) sheets = wb.sheetnames for sheet in sheets: ws = wb[sheet] columns = [] query = 'CREATE TABLE ' + str(slugify(sheet)) + '(ID INTEGER PRIMARY KEY AUTOINCREMENT' for row in next(ws.rows): query += ', ' + slugify(row.value) + ' TEXT' columns.append(slugify(row.value)) query += ');' con.execute(query) tup = [] for i, rows in enumerate(ws): tuprow = [] if i == 0: continue for row in rows: tuprow.append(unicode(row.value).strip()) if unicode(row.value).strip() != 'None' else tuprow.append('') tup.append(tuple(tuprow)) insQuery1 = 'INSERT INTO ' + str(slugify(sheet)) + '(' insQuery2 = '' for col in columns: insQuery1 += col + ', ' insQuery2 += '?, ' insQuery1 = insQuery1[:-2] + ') VALUES(' insQuery2 = insQuery2[:-2] + ')' insQuery = insQuery1 + insQuery2 con.executemany(insQuery, tup) con.commit() con.close()
class NamedRange(object): """A named group of cells Scope is a worksheet object or None for workbook scope names (the default) """ __slots__ = ('name', 'destinations', 'scope') str_format = unicode('%s!%s') repr_format = unicode('<%s "%s">') def __init__(self, name, destinations, scope=None): self.name = name self.destinations = destinations self.scope = scope def __str__(self): return ','.join([self.str_format % (sheet, name) for sheet, name in self.destinations]) def __repr__(self): return self.repr_format % (self.__class__.__name__, str(self))
def to_str(my_string): # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode` if type(my_string) == str and str != unicode: return unicode(my_string, 'utf-8') elif type(my_string) == unicode: return my_string else: try: return str(my_string) except: print('Couldnt parse as string', type(my_string)) return my_string
def __init__(self, address, sheet = None, value=None, formula=None, is_range = False, is_named_range=False, should_eval='normal'): super(Cell,self).__init__() if is_named_range == False: # remove $'s address = address.replace('$','') sh,c,r = split_address(address) # both are empty if not sheet and not sh: raise Exception("Sheet name may not be empty for cell address %s" % address) # both exist but disagree elif sh and sheet and sh != sheet: raise Exception("Sheet name mismatch for cell address %s: %s vs %s" % (address,sheet, sh)) elif not sh and sheet: sh = sheet else: pass # we assume a cell's location can never change self.__sheet = sheet.encode('utf-8') if sheet is not None else sheet self.__sheet = sh self.__col = c self.__row = int(r) self.__col_idx = col2num(c) else: self.__named_range = address self.__sheet = None self.__col = None self.__row = None self.__col_idx = None # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode` if type(formula) == str and str != unicode: self.__formula = unicode(formula, 'utf-8') if formula else None else: self.__formula = formula if formula else None self.__value = value self.python_expression = None self.need_update = False self.should_eval = should_eval self.__compiled_expression = None self.__is_range = is_range # every cell has a unique id self.__id = Cell.next_id()
def write_tables(self): tables = TablePartList() for table in self.ws._tables: if not table.tableColumns: table._initialise_columns() if table.headerRowCount: row = self.ws[table.ref][0] for cell, col in zip(row, table.tableColumns): if cell.data_type != "s": warn( "File may not be readable: column headings must be strings." ) col.name = unicode(cell.value) rel = Relationship(Type=table._rel_type, Target="") self._rels.append(rel) table._rel_id = rel.Id tables.append(Related(id=rel.Id)) if tables: self.xf.send(tables.to_tree())
def _add_table_headers(ws): """ Check if tables have tableColumns and create them and autoFilter if necessary. Column headers will be taken from the first row of the table. """ tables = TablePartList() for table in ws._tables: if not table.tableColumns: table._initialise_columns() if table.headerRowCount: row = ws[table.ref][0] for cell, col in zip(row, table.tableColumns): if cell.data_type != "s": warn("File may not be readable: column headings must be strings.") col.name = unicode(cell.value) rel = Relationship(Type=table._rel_type, Target="") ws._rels.append(rel) table._rel_id = rel.Id tables.append(Related(id=rel.Id)) return tables
def test_get_xml_iter(): #1 file object #2 stream (file-like) #3 string #4 zipfile from openpyxl.reader.worksheet import _get_xml_iter from tempfile import TemporaryFile FUT = _get_xml_iter s = b"" stream = FUT(s) assert isinstance(stream, BytesIO), type(stream) u = unicode(s) stream = FUT(u) assert isinstance(stream, BytesIO), type(stream) f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp') stream = FUT(f) assert stream == f f.close() t = TemporaryFile() z = ZipFile(t, mode="w") z.writestr("test", "whatever") stream = FUT(z.open("test")) assert hasattr(stream, "read") # z.close() try: z.close() except IOError: # you can't just close zipfiles in Windows if z.fp is not None: z.fp.close() # python 2.6 else: z.close() # python 2.7
def __repr__(self): return unicode("<Cell %s.%s>") % (self.parent.title, self.coordinate)
def __repr__(self): return unicode(self)
def resolve_range(rng, should_flatten = False, sheet=''): # print 'RESOLVE RANGE splitting', rng if ':' not in rng: if '!' in rng: rng = rng.split('!') return ExcelError('#REF!', info = '%s is not a regular range, nor a named_range' % rng) sh, start, end = split_range(rng) if sh and sheet: if sh != sheet: raise Exception("Mismatched sheets %s and %s" % (sh,sheet)) else: sheet += '!' elif sh and not sheet: sheet = sh + "!" elif sheet and not sh: sheet += "!" else: pass # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode` if type(sheet) == str and str != unicode: sheet = unicode(sheet, 'utf-8') if type(rng) == str and str != unicode: rng = unicode(rng, 'utf-8') key = rng+str(should_flatten)+sheet if key in resolve_range_cache: return resolve_range_cache[key] else: if not is_range(rng): return ([sheet + rng],1,1) # single cell, no range if start.isdigit() and end.isdigit(): # This copes with 1:1 style ranges start_col = "A" start_row = start end_col = "XFD" end_row = end elif start.isalpha() and end.isalpha(): # This copes with A:A style ranges start_col = start start_row = 1 end_col = end end_row = 2**20 else: sh, start_col, start_row = split_address(start) sh, end_col, end_row = split_address(end) start_col_idx = col2num(start_col) end_col_idx = col2num(end_col); start_row = int(start_row) end_row = int(end_row) # Attempt to use Numpy, not relevant for now # num2col_vec = np.vectorize(num2col) # r = np.array([range(start_row, end_row + 1),]*nb_col, dtype='a5').T # c = num2col_vec(np.array([range(start_col_idx, end_col_idx + 1),]*nb_row)) # if len(sheet)>0: # s = np.chararray((nb_row, nb_col), itemsize=len(sheet)) # s[:] = sheet # c = np.core.defchararray.add(s, c) # B = np.core.defchararray.add(c, r) # if start_col == end_col: # data = B.T.tolist()[0] # return data, len(data), 1 # elif start_row == end_row: # data = B.tolist()[0] # return data, 1, len(data) # else: # if should_flatten: # return B.flatten().tolist(), 1, nb_col*nb_row # else: # return B.tolist(), nb_row, nb_col # single column if start_col == end_col: nrows = end_row - start_row + 1 data = [ "%s%s%s" % (s,c,r) for (s,c,r) in zip([sheet]*nrows,[start_col]*nrows,list(range(start_row,end_row+1)))] output = data,len(data),1 # single row elif start_row == end_row: ncols = end_col_idx - start_col_idx + 1 data = [ "%s%s%s" % (s,num2col(c),r) for (s,c,r) in zip([sheet]*ncols,list(range(start_col_idx,end_col_idx+1)),[start_row]*ncols)] output = data,1,len(data) # rectangular range else: cells = [] for r in range(start_row,end_row+1): row = [] for c in range(start_col_idx,end_col_idx+1): row.append(sheet + num2col(c) + str(r)) cells.append(row) if should_flatten: # flatten into one list l = list(flatten(cells, only_lists = True)) output = l,len(cells), len(cells[0]) else: output = cells, len(cells), len(cells[0]) resolve_range_cache[key] = output return output
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ read_only = read_only or use_iterators is_file_like = hasattr(filename, 'read') if not is_file_like and os.path.isfile(filename): file_format = os.path.splitext(filename)[-1] if file_format not in SUPPORTED_FORMATS: if file_format == '.xls': msg = ('openpyxl does not support the old .xls file format, ' 'please use xlrd to read this file, or convert it to ' 'the more recent .xlsx file format.') elif file_format == '.xlsb': msg = ('openpyxl does not support binary format .xlsb, ' 'please convert this file to .xlsx format if you want ' 'to open it with openpyxl') else: msg = ('openpyxl does not support %s file format, ' 'please check you can open ' 'it with Excel first. ' 'Supported formats are: %s') % ( file_format, ','.join(SUPPORTED_FORMATS)) raise InvalidFileException(msg) if is_file_like: # fileobject must have been opened with 'rb' flag # it is required by zipfile if getattr(filename, 'encoding', None) is not None: raise IOError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: f = repair_central_directory(filename, is_file_like) archive = ZipFile(f, 'r', ZIP_DEFLATED) wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only) if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') try: _load_workbook(wb, archive, filename, read_only, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb
class Worksheet(object): """Represents a worksheet. Do not create worksheets yourself, use :func:`openpyxl.workbook.Workbook.create_sheet` instead """ repr_format = unicode('<Worksheet "%s">') bad_title_char_re = re.compile(r'[\\*?:/\[\]]') BREAK_NONE = 0 BREAK_ROW = 1 BREAK_COLUMN = 2 SHEETSTATE_VISIBLE = 'visible' SHEETSTATE_HIDDEN = 'hidden' SHEETSTATE_VERYHIDDEN = 'veryHidden' # Paper size PAPERSIZE_LETTER = '1' PAPERSIZE_LETTER_SMALL = '2' PAPERSIZE_TABLOID = '3' PAPERSIZE_LEDGER = '4' PAPERSIZE_LEGAL = '5' PAPERSIZE_STATEMENT = '6' PAPERSIZE_EXECUTIVE = '7' PAPERSIZE_A3 = '8' PAPERSIZE_A4 = '9' PAPERSIZE_A4_SMALL = '10' PAPERSIZE_A5 = '11' # Page orientation ORIENTATION_PORTRAIT = 'portrait' ORIENTATION_LANDSCAPE = 'landscape' def __init__(self, parent_workbook, title='Sheet'): self._parent = parent_workbook self._title = '' if not title: self.title = 'Sheet%d' % (1 + len(self._parent.worksheets)) else: self.title = title self.row_dimensions = {} self.column_dimensions = OrderedDict([]) self.page_breaks = [] self._cells = {} self._styles = {} self._charts = [] self._images = [] self._comment_count = 0 self._merged_cells = [] self.relationships = [] self._data_validations = [] self.selected_cell = 'A1' self.active_cell = 'A1' self.sheet_state = self.SHEETSTATE_VISIBLE self.page_setup = PageSetup() self.page_margins = PageMargins() self.header_footer = HeaderFooter() self.sheet_view = SheetView() self.protection = SheetProtection() self.show_gridlines = True self.print_gridlines = False self.show_summary_below = True self.show_summary_right = True self.default_row_dimension = RowDimension(self) self.default_column_dimension = ColumnDimension(self) self._auto_filter = AutoFilter() self._freeze_panes = None self.paper_size = None self.formula_attributes = {} self.orientation = None self.xml_source = None self.conditional_formatting = ConditionalFormatting() def __repr__(self): return self.repr_format % self.title @property def parent(self): return self._parent @property def encoding(self): return self._parent.encoding def garbage_collect(self): """Delete cells that are not storing a value.""" delete_list = [coordinate for coordinate, cell in \ iteritems(self._cells) if (not cell.merged and cell.value in ('', None) and \ cell.comment is None and (coordinate not in self._styles or hash(cell.style) == _DEFAULTS_STYLE_HASH))] for coordinate in delete_list: del self._cells[coordinate] def get_cell_collection(self): """Return an unordered list of the cells in this worksheet.""" return self._cells.values() @property def title(self): """Return the title for this sheet.""" return self._title @title.setter def title(self, value): """Set a sheet title, ensuring it is valid. Limited to 31 characters, no special characters.""" if self.bad_title_char_re.search(value): msg = 'Invalid character found in sheet title' raise SheetTitleException(msg) value = self.unique_sheet_name(value) if len(value) > 31: msg = 'Maximum 31 characters allowed in sheet title' raise SheetTitleException(msg) self._title = value def unique_sheet_name(self, value): # check if sheet_name already exists # do this *before* length check sheets = self._parent.get_sheet_names() if value in sheets: sheets = ",".join(sheets) sheet_title_regex = re.compile("(?P<title>%s)(?P<count>\d?),?" % value) matches = sheet_title_regex.findall(sheets) if matches: # use name, but append with the next highest integer counts = [int(idx) for (t, idx) in matches if idx.isdigit()] if counts: highest = max(counts) else: highest = 0 value = "%s%d" % (value, highest + 1) return value @property def auto_filter(self): """Return :class:`~openpyxl.worksheet.AutoFilter` object. `auto_filter` attribute stores/returns string until 1.8. You should change your code like ``ws.auto_filter.ref = "A1:A3"``. .. versionchanged:: 1.9 """ return self._auto_filter @property def freeze_panes(self): return self._freeze_panes @freeze_panes.setter def freeze_panes(self, topLeftCell): if not topLeftCell: topLeftCell = None elif isinstance(topLeftCell, str): topLeftCell = topLeftCell.upper() else: # Assume a cell topLeftCell = topLeftCell.coordinate if topLeftCell == 'A1': topLeftCell = None self._freeze_panes = topLeftCell def add_print_title(self, n, rows_or_cols='rows'): """ Print Titles are rows or columns that are repeated on each printed sheet. This adds n rows or columns at the top or left of the sheet """ if rows_or_cols == 'cols': r = '$A:$%s' % get_column_letter(n) else: r = '$1:$%d' % n self.parent.create_named_range('_xlnm.Print_Titles', self, r, self) def cell(self, coordinate=None, row=None, column=None): """Returns a cell object based on the given coordinates. Usage: cell(coodinate='A15') **or** cell(row=15, column=1) If `coordinates` are not given, then row *and* column must be given. Cells are kept in a dictionary which is empty at the worksheet creation. Calling `cell` creates the cell in memory when they are first accessed, to reduce memory usage. :param coordinate: coordinates of the cell (e.g. 'B12') :type coordinate: string :param row: row index of the cell (e.g. 4) :type row: int :param column: column index of the cell (e.g. 3) :type column: int :raise: InsufficientCoordinatesException when coordinate or (row and column) are not given :rtype: :class:`openpyxl.cell.Cell` """ if not coordinate: if (row is None or column is None): msg = "You have to provide a value either for " \ "'coordinate' or for 'row' *and* 'column'" raise InsufficientCoordinatesException(msg) else: coordinate = '%s%s' % (get_column_letter(column + 1), row + 1) else: coordinate = coordinate.replace('$', '') return self._get_cell(coordinate) def _get_cell(self, coordinate): if not coordinate in self._cells: column, row = coordinate_from_string(coordinate) new_cell = openpyxl.cell.Cell(self, column, row) self._cells[coordinate] = new_cell if column not in self.column_dimensions: self.column_dimensions[column] = ColumnDimension(column) if row not in self.row_dimensions: self.row_dimensions[row] = RowDimension(row) return self._cells[coordinate] def __getitem__(self, key): """Convenience access by Excel style address""" if isinstance(key, slice): return self.range("{0}:{1}".format(key.start, key.stop)) return self._get_cell(key) def __setitem__(self, key, value): self[key].value = value def get_highest_row(self): """Returns the maximum row index containing data :rtype: int """ if self.row_dimensions: return max(self.row_dimensions) else: return 1 def get_highest_column(self): """Get the largest value for column currently stored. :rtype: int """ if self.column_dimensions: return max([ column_index_from_string(column_index) for column_index in self.column_dimensions ]) else: return 1 def calculate_dimension(self): """Return the minimum bounding range for all cells containing data.""" return 'A1:%s%d' % (get_column_letter( self.get_highest_column()), self.get_highest_row()) def range(self, range_string, row=0, column=0): """Returns a 2D array of cells, with optional row and column offsets. :param range_string: cell range string or `named range` name :type range_string: string :param row: number of rows to offset :type row: int :param column: number of columns to offset :type column: int :rtype: tuples of tuples of :class:`openpyxl.cell.Cell` """ if ':' in range_string: # R1C1 range result = [] min_range, max_range = range_string.split(':') min_col, min_row = coordinate_from_string(min_range) max_col, max_row = coordinate_from_string(max_range) if column: min_col = get_column_letter( column_index_from_string(min_col) + column) max_col = get_column_letter( column_index_from_string(max_col) + column) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) cache_cols = {} for col in xrange(min_col, max_col + 1): cache_cols[col] = get_column_letter(col) rows = xrange(min_row + row, max_row + row + 1) cols = xrange(min_col, max_col + 1) for row in rows: new_row = [] for col in cols: new_row.append(self.cell('%s%s' % (cache_cols[col], row))) result.append(tuple(new_row)) return tuple(result) else: try: return self.cell(coordinate=range_string, row=row, column=column) except CellCoordinatesException: pass # named range named_range = self._parent.get_named_range(range_string) if named_range is None: msg = '%s is not a valid range name' % range_string raise NamedRangeException(msg) if isinstance(named_range, NamedRangeContainingValue): msg = '%s refers to a value, not a range' % range_string raise NamedRangeException(msg) result = [] for destination in named_range.destinations: worksheet, cells_range = destination if worksheet is not self: msg = 'Range %s is not defined on worksheet %s' % \ (cells_range, self.title) raise NamedRangeException(msg) content = self.range(cells_range) if isinstance(content, tuple): for cells in content: result.extend(cells) else: result.append(content) if len(result) == 1: return result[0] else: return tuple(result) def get_style(self, coordinate, read_only=False): """Return the style object for the specified cell.""" if not coordinate in self._styles: self._styles[coordinate] = Style() elif self._styles[coordinate].static and not read_only: self._styles[coordinate] = self._styles[coordinate].copy() return self._styles[coordinate] def set_printer_settings(self, paper_size, orientation): """Set printer settings """ self.page_setup.paperSize = paper_size if orientation not in (self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE): raise ValueError( "Values should be %s or %s" % (self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE)) self.page_setup.orientation = orientation def create_relationship(self, rel_type): """Add a relationship for this sheet.""" rel = Relationship(rel_type) self.relationships.append(rel) rel_id = self.relationships.index(rel) rel.id = 'rId' + str(rel_id + 1) return self.relationships[rel_id] def add_data_validation(self, data_validation): """ Add a data-validation object to the sheet. The data-validation object defines the type of data-validation to be applied and the cell or range of cells it should apply to. """ data_validation._sheet = self self._data_validations.append(data_validation) def add_chart(self, chart): """ Add a chart to the sheet """ chart._sheet = self self._charts.append(chart) self.add_drawing(chart) def add_image(self, img): """ Add an image to the sheet """ img._sheet = self self._images.append(img) self.add_drawing(img) def add_drawing(self, obj): """Images and charts both create drawings""" self._parent.drawings.append(obj) def add_rel(self, obj): """Drawings and hyperlinks create relationships""" self._parent.relationships.append(obj) def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') # Make sure top_left cell exists - is this necessary? min_col, min_row = coordinate_from_string(range_string.split(':')[0]) max_col, max_row = coordinate_from_string(range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Blank out the rest of the cells in the range for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): # PHPExcel adds cell and specifically blanks it out if it doesn't exist self._get_cell('%s%s' % (get_column_letter(col), row)).value = None self._get_cell('%s%s' % (get_column_letter(col), row)).merged = True if range_string not in self._merged_cells: self._merged_cells.append(range_string) def unmerge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Remove merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column + 1), start_row + 1, get_column_letter(end_column + 1), end_row + 1) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') if range_string in self._merged_cells: self._merged_cells.remove(range_string) min_col, min_row = coordinate_from_string( range_string.split(':')[0]) max_col, max_row = coordinate_from_string( range_string.split(':')[1]) min_col = column_index_from_string(min_col) max_col = column_index_from_string(max_col) # Mark cell as unmerged for col in xrange(min_col, max_col + 1): for row in xrange(min_row, max_row + 1): if not (row == min_row and col == min_col): self._get_cell( '%s%s' % (get_column_letter(col), row)).merged = False else: msg = 'Cell range %s not known as merged.' % range_string raise InsufficientCoordinatesException(msg) def append(self, list_or_dict): """Appends a group of values at the bottom of the current sheet. * If it's a list: all values are added in order, starting from the first column * If it's a dict: values are assigned to the columns indicated by the keys (numbers or letters) :param list_or_dict: list or dict containing values to append :type list_or_dict: list/tuple or dict Usage: * append(['This is A1', 'This is B1', 'This is C1']) * **or** append({'A' : 'This is A1', 'C' : 'This is C1'}) * **or** append({0 : 'This is A1', 2 : 'This is C1'}) :raise: TypeError when list_or_dict is neither a list/tuple nor a dict """ row_idx = len(self.row_dimensions) if isinstance(list_or_dict, (list, tuple)): for col_idx, content in enumerate(list_or_dict): self.cell(row=row_idx, column=col_idx).value = content elif isinstance(list_or_dict, dict): for col_idx, content in iteritems(list_or_dict): if isinstance(col_idx, basestring): col_idx = column_index_from_string(col_idx) - 1 self.cell(row=row_idx, column=col_idx).value = content else: raise TypeError('list_or_dict must be a list or a dict') @property def rows(self): return self.range(self.calculate_dimension()) @property def columns(self): max_row = self.get_highest_row() cols = [] for col_idx in range(self.get_highest_column()): col = get_column_letter(col_idx + 1) res = self.range('%s1:%s%d' % (col, col, max_row)) cols.append(tuple([x[0] for x in res])) return tuple(cols) def point_pos(self, left=0, top=0): """ tells which cell is under the given coordinates (in pixels) counting from the top-left corner of the sheet. Can be used to locate images and charts on the worksheet """ current_col = 1 current_row = 1 column_dimensions = self.column_dimensions row_dimensions = self.row_dimensions default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH) default_height = points_to_pixels(DEFAULT_ROW_HEIGHT) left_pos = 0 top_pos = 0 while left_pos <= left: letter = get_column_letter(current_col) current_col += 1 if letter in column_dimensions: cdw = column_dimensions[letter].width if cdw > 0: left_pos += points_to_pixels(cdw) continue left_pos += default_width while top_pos <= top: row = current_row current_row += 1 if row in row_dimensions: rdh = row_dimensions[row].height if rdh > 0: top_pos += points_to_pixels(rdh) continue top_pos += default_height return (letter, row)
def check_error(self, value): """Tries to convert Error" else N/A""" try: return unicode(value) except: return unicode('#N/A')
def test_unicode(self, HeaderFooterItem): from openpyxl.compat import unicode hf = HeaderFooterItem() hf.left.text = u'D\xfcsseldorf' assert unicode(hf) == u'&LD\xfcsseldorf'
def test_unicode(self, _HeaderFooterPart): from openpyxl.compat import unicode hf = _HeaderFooterPart() hf.text = u"D\xfcsseldorf" assert unicode(hf) == u"D\xfcsseldorf"
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ is_file_instance = isinstance(filename, file) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if 'b' not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: try: f = repair_central_directory(filename, is_file_instance) archive = ZipFile(f, 'r', ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook(guess_types=guess_types, data_only=data_only) if use_iterators: wb._set_optimized_read() if guess_types: warnings.warn('Data types are not guessed when using iterator reader') try: _load_workbook(wb, archive, filename, use_iterators, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ is_file_instance = isinstance(filename, file) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if 'b' not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: file_format = os.path.splitext(filename)[-1] if file_format not in SUPPORTED_FORMATS: if file_format == '.xls': msg = ('openpyxl does not support the old .xls file format, ' 'please use xlrd to read this file, or convert it to ' 'the more recent .xlsx file format.') elif file_format == '.xlsb': msg = ('openpyxl does not support binary format .xlsb, ' 'please convert this file to .xlsx format if you want ' 'to open it with openpyxl') else: msg = ('openpyxl does not support %s file format, ' 'please check you can open ' 'it with Excel first. ' 'Supported formats are: %s') % (file_format, ','.join(SUPPORTED_FORMATS)) raise InvalidFileException(msg) try: f = repair_central_directory(filename, is_file_instance) archive = ZipFile(f, 'r', ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook(guess_types=guess_types, data_only=data_only) if use_iterators: wb._set_optimized_read() if guess_types: warnings.warn('Data types are not guessed when using iterator reader') try: _load_workbook(wb, archive, filename, use_iterators, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb
def __init__(self, val, alpha=None): self.val = unicode(val) self.alpha = alpha
def append(self, row): """ :param row: iterable containing values to append :type row: iterable """ doc = self._get_content_generator() self._max_row += 1 span = len(row) self._max_col = max(self._max_col, span) row_idx = self._max_row attrs = {'r': '%d' % row_idx, 'spans': '1:%d' % span} start_tag(doc, 'row', attrs) for col_idx, cell in enumerate(row): style = None comment = None if cell is None: continue elif isinstance(cell, dict): dct = cell cell = dct.get('value') if cell is None: continue style = dct.get('style') comment = dct.get('comment') for ob, attr, cls in ((style, 'style', Style), (comment, 'comment', Comment)): if ob is not None and not isinstance(ob, cls): raise TypeError('%s should be a %s not a %s' % (attr, cls.__class__.__name__, ob.__class__.__name__)) column = get_column_letter(col_idx + 1) coordinate = '%s%d' % (column, row_idx) attributes = {'r': coordinate} if comment is not None: comment._parent = CommentParentCell(coordinate, row_idx, column) self._comments.append(comment) self._comment_count += 1 if isinstance(cell, bool): dtype = 'boolean' elif isinstance(cell, NUMERIC_TYPES): dtype = 'numeric' elif isinstance(cell, TIME_TYPES): dtype = 'datetime' if isinstance(cell, datetime.date): cell = to_excel(cell) elif isinstance(cell, datetime.time): cell = time_to_days(cell) elif isinstance(cell, datetime.timedelta): cell = timedelta_to_days(cell) if style is None: # allow user-defined style if needed style = STYLES[dtype]['style'] elif cell and cell[0] == '=': dtype = 'formula' else: dtype = 'string' cell = self._strings.add(unicode(cell)) if style is not None: attributes['s'] = '%d' % self._styles.add(style) if dtype != 'formula': attributes['t'] = STYLES[dtype]['type'] start_tag(doc, 'c', attributes) if dtype == 'formula': tag(doc, 'f', body='%s' % cell[1:]) tag(doc, 'v') elif dtype == 'boolean': tag(doc, 'v', body='%d' % cell) else: tag(doc, 'v', body='%s' % cell) end_tag(doc, 'c') end_tag(doc, 'row')
def test_repr(self, Reference): ref = Reference( range_string=b'D\xc3\xbcsseldorf!A1:A10'.decode("utf8")) assert unicode(ref) == b'D\xc3\xbcsseldorf!$A$1:$A$10'.decode("utf8")
def check_error(self, value): """Tries to convert Error" else N/A""" try: return unicode(value) except UnicodeDecodeError: return u"#N/A"
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ is_file_instance = isinstance(filename, file) if is_file_instance: # fileobject must have been opened with 'rb' flag # it is required by zipfile if 'b' not in filename.mode: raise OpenModeError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: try: f = repair_central_directory(filename, is_file_instance) archive = ZipFile(f, 'r', ZIP_DEFLATED) except BadZipfile: e = exc_info()[1] raise InvalidFileException(unicode(e)) except (BadZipfile, RuntimeError, IOError, ValueError): e = exc_info()[1] raise InvalidFileException(unicode(e)) wb = Workbook(guess_types=guess_types, data_only=data_only) if use_iterators: wb._set_optimized_read() if guess_types: warnings.warn( 'Data types are not guessed when using iterator reader') try: _load_workbook(wb, archive, filename, use_iterators, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb
class Worksheet(object): """Represents a worksheet. Do not create worksheets yourself, use :func:`openpyxl.workbook.Workbook.create_sheet` instead """ repr_format = unicode('<Worksheet "%s">') bad_title_char_re = re.compile(r'[\\*?:/\[\]]') BREAK_NONE = 0 BREAK_ROW = 1 BREAK_COLUMN = 2 SHEETSTATE_VISIBLE = 'visible' SHEETSTATE_HIDDEN = 'hidden' SHEETSTATE_VERYHIDDEN = 'veryHidden' # Paper size PAPERSIZE_LETTER = '1' PAPERSIZE_LETTER_SMALL = '2' PAPERSIZE_TABLOID = '3' PAPERSIZE_LEDGER = '4' PAPERSIZE_LEGAL = '5' PAPERSIZE_STATEMENT = '6' PAPERSIZE_EXECUTIVE = '7' PAPERSIZE_A3 = '8' PAPERSIZE_A4 = '9' PAPERSIZE_A4_SMALL = '10' PAPERSIZE_A5 = '11' # Page orientation ORIENTATION_PORTRAIT = 'portrait' ORIENTATION_LANDSCAPE = 'landscape' def __init__(self, parent_workbook, title='Sheet'): self._parent = parent_workbook self._title = '' if not title: self.title = 'Sheet%d' % (1 + len(self._parent.worksheets)) else: self.title = title self.row_dimensions = {} self.column_dimensions = DimensionHolder(worksheet=self, direction=[]) self.page_breaks = [] self._cells = {} self._styles = {} self._charts = [] self._images = [] self._comment_count = 0 self._merged_cells = [] self.relationships = [] self._data_validations = [] self.sheet_state = self.SHEETSTATE_VISIBLE self.page_setup = PageSetup() self.print_options = PrintOptions() self.page_margins = PageMargins() self.header_footer = HeaderFooter() self.sheet_view = SheetView() self.protection = SheetProtection() self.default_row_dimension = RowDimension(worksheet=self) self.default_column_dimension = ColumnDimension(worksheet=self) self._auto_filter = AutoFilter() self._freeze_panes = None self.paper_size = None self.formula_attributes = {} self.orientation = None self.conditional_formatting = ConditionalFormatting() self.vba_controls = None self.sheet_properties = WorksheetProperties() self.sheet_properties.outlinePr = Outline(summaryBelow=True, summaryRight=True) @property def selected_cell(self): return self.sheet_view.selection.sqref @property def active_cell(self): return self.sheet_view.selection.activeCell @property def show_gridlines(self): return self.sheet_view.showGridLines def __repr__(self): return self.repr_format % self.title """ To keep compatibility with previous versions""" @property def show_summary_below(self): return self.sheet_properties.outlinePr.summaryBelow @property def show_summary_right(self): return self.sheet_properties.outlinePr.summaryRight @property def vba_code(self): for attr in ("codeName", "enableFormatConditionsCalculation", "filterMode", "published", "syncHorizontal", "syncRef", "syncVertical", "transitionEvaluation", "transitionEntry"): value = getattr(self.sheet_properties, attr) if value is not None: yield attr, safe_string(value) @vba_code.setter def vba_code(self, value): for k, v in value.items(): if k in ("codeName", "enableFormatConditionsCalculation", "filterMode", "published", "syncHorizontal", "syncRef", "syncVertical", "transitionEvaluation", "transitionEntry"): setattr(self.sheet_properties, k, v) """ End To keep compatibility with previous versions""" @property def parent(self): return self._parent @property def encoding(self): return self._parent.encoding @deprecated('this method is private and should not be called directly') def garbage_collect(self): self._garbage_collect() def _garbage_collect(self): """Delete cells that are not storing a value.""" delete_list = [] for coordinate, cell in iteritems(self._cells): if (cell.value in ('', None) and cell.comment is None and (coordinate not in self._styles or cell.style == DEFAULTS_STYLE)): delete_list.append(coordinate) for coordinate in delete_list: del self._cells[coordinate] def get_cell_collection(self): """Return an unordered list of the cells in this worksheet.""" return self._cells.values() @property def title(self): """Return the title for this sheet.""" return self._title @title.setter def title(self, value): """Set a sheet title, ensuring it is valid. Limited to 31 characters, no special characters.""" if self.bad_title_char_re.search(value): msg = 'Invalid character found in sheet title' raise SheetTitleException(msg) value = self._unique_sheet_name(value) if len(value) > 31: msg = 'Maximum 31 characters allowed in sheet title' raise SheetTitleException(msg) self._title = value @deprecated('this method is private and should not be called directly') def unique_sheet_name(self, value): return self._unique_sheet_name(value) def _unique_sheet_name(self, value): # check if sheet_name already exists # do this *before* length check sheets = self._parent.get_sheet_names() if value in sheets: sheets = ",".join(sheets) sheet_title_regex = re.compile("(?P<title>%s)(?P<count>\d?),?" % re.escape(value)) matches = sheet_title_regex.findall(sheets) if matches: # use name, but append with the next highest integer counts = [int(idx) for (t, idx) in matches if idx.isdigit()] if counts: highest = max(counts) else: highest = 0 value = "%s%d" % (value, highest + 1) return value @property def auto_filter(self): """Return :class:`~openpyxl.worksheet.AutoFilter` object. `auto_filter` attribute stores/returns string until 1.8. You should change your code like ``ws.auto_filter.ref = "A1:A3"``. .. versionchanged:: 1.9 """ return self._auto_filter @property def freeze_panes(self): if self.sheet_view.pane is not None: return self.sheet_view.pane.topLeftCell @freeze_panes.setter def freeze_panes(self, topLeftCell): if not topLeftCell: topLeftCell = None elif isinstance(topLeftCell, str): topLeftCell = topLeftCell.upper() else: # Assume a cell topLeftCell = topLeftCell.coordinate if topLeftCell == 'A1': topLeftCell = None if not topLeftCell: self.sheet_view.pane = None return if topLeftCell is not None: colName, row = coordinate_from_string(topLeftCell) column = column_index_from_string(colName) view = self.sheet_view view.pane = Pane(topLeftCell=topLeftCell, activePane="topRight", state="frozen") view.selection[0].pane = "topRight" if column > 1: view.pane.xSplit = column - 1 if row > 1: view.pane.ySplit = row - 1 view.pane.activePane = 'bottomLeft' view.selection[0].pane = "bottomLeft" if column > 1: view.selection[0].pane = "bottomRight" view.pane.activePane = 'bottomRight' if row > 1 and column > 1: sel = list(view.selection) sel.insert(0, Selection(pane="topRight", activeCell=None, sqref=None)) sel.insert( 1, Selection(pane="bottomLeft", activeCell=None, sqref=None)) view.selection = sel def add_print_title(self, n, rows_or_cols='rows'): """ Print Titles are rows or columns that are repeated on each printed sheet. This adds n rows or columns at the top or left of the sheet """ scope = self.parent.get_index(self) if rows_or_cols == 'cols': r = '$A:$%s' % get_column_letter(n) else: r = '$1:$%d' % n self.parent.create_named_range('_xlnm.Print_Titles', self, r, scope) def cell(self, coordinate=None, row=None, column=None, value=None): """Returns a cell object based on the given coordinates. Usage: cell(coodinate='A15') **or** cell(row=15, column=1) If `coordinates` are not given, then row *and* column must be given. Cells are kept in a dictionary which is empty at the worksheet creation. Calling `cell` creates the cell in memory when they are first accessed, to reduce memory usage. :param coordinate: coordinates of the cell (e.g. 'B12') :type coordinate: string :param row: row index of the cell (e.g. 4) :type row: int :param column: column index of the cell (e.g. 3) :type column: int :raise: InsufficientCoordinatesException when coordinate or (row and column) are not given :rtype: :class:openpyxl.cell.Cell """ if coordinate is None: if (row is None or column is None): msg = "You have to provide a value either for " \ "'coordinate' or for 'row' *and* 'column'" raise InsufficientCoordinatesException(msg) else: column = get_column_letter(column) coordinate = '%s%s' % (column, row) else: coordinate = coordinate.upper().replace('$', '') if coordinate not in self._cells: if row is None or column is None: column, row = coordinate_from_string(coordinate) self._new_cell(column, row, value) return self._cells[coordinate] def _get_cell(self, coordinate): """ Internal method for getting a cell from a worksheet. Will create a new cell if one doesn't already exist. """ coordinate = coordinate.upper() if not coordinate in self._cells: column, row = coordinate_from_string(coordinate) self._new_cell(column, row) return self._cells[coordinate] def _new_cell(self, column, row, value=None): cell = Cell(self, column, row, value) self._add_cell(cell) def _add_cell(self, cell): """ Internal method for adding cell objects. """ column = cell.column row = cell.row self._cells[cell.coordinate] = cell if column not in self.column_dimensions: self.column_dimensions[column] = ColumnDimension(index=column, worksheet=self) if row not in self.row_dimensions: self.row_dimensions[row] = RowDimension(index=row, worksheet=self) self._cells[cell.coordinate] = cell def __getitem__(self, key): """Convenience access by Excel style address""" if isinstance(key, slice): return self.iter_rows("{0}:{1}".format(key.start, key.stop)) if ":" in key: return self.iter_rows(key) return self._get_cell(key) def __setitem__(self, key, value): self[key].value = value def get_highest_row(self): """Returns the maximum row index containing data :rtype: int """ if self.row_dimensions: return max(self.row_dimensions) else: return 0 @property def min_row(self): if self.row_dimensions: return min(self.row_dimensions) else: return 1 @property def max_row(self): return self.get_highest_row() def get_highest_column(self): """Get the largest value for column currently stored. :rtype: int """ if self.column_dimensions: return max([ column_index_from_string(column_index) for column_index in self.column_dimensions ]) else: return 1 @property def min_col(self): if self.column_dimensions: return max([ column_index_from_string(column_index) for column_index in self.column_dimensions ]) else: return 1 @property def max_column(self): return self.get_highest_column() def calculate_dimension(self): """Return the minimum bounding range for all cells containing data.""" return '%s%d:%s%d' % (get_column_letter(1), self.min_row, get_column_letter(self.max_column or 1), self.max_row or 1) @property def dimensions(self): return self.calculate_dimension() def iter_rows(self, range_string=None, row_offset=0, column_offset=0): """ Returns a squared range based on the `range_string` parameter, using generators. If no range is passed, will iterate over all cells in the worksheet :param range_string: range of cells (e.g. 'A1:C4') :type range_string: string :param row_offset: additional rows (e.g. 4) :type row: int :param column_offset: additonal columns (e.g. 3) :type column: int :rtype: generator """ if range_string is not None: min_col, min_row, max_col, max_row = range_boundaries( range_string.upper()) else: min_col, min_row, max_col, max_row = (1, 1, self.max_column, self.max_row) if max_col is not None: max_col += column_offset if max_row is not None: max_row += row_offset return self.get_squared_range(min_col + column_offset, min_row + row_offset, max_col, max_row) def get_squared_range(self, min_col, min_row, max_col, max_row): """Returns a 2D array of cells :param min_col: smallest column index (1-based index) :type min_col: int :param min_row: smallest row index (1-based index) :type min_row: int :param max_col: largest column index (1-based index) :type max_col: int :param max_row: smallest row index (1-based index) :type max_row: int :rtype: generator """ # Column name cache is very important in large files. cache = dict((col, get_column_letter(col)) for col in range(min_col, max_col + 1)) for row in range(min_row, max_row + 1): yield tuple( self._get_cell('%s%d' % (cache[col], row)) for col in range(min_col, max_col + 1)) def get_named_range(self, range_string): """ Returns a 2D array of cells, with optional row and column offsets. :param range_string: `named range` name :type range_string: string :rtype: tuples of tuples of :class:`openpyxl.cell.Cell` """ named_range = self._parent.get_named_range(range_string) if named_range is None: msg = '%s is not a valid range name' % range_string raise NamedRangeException(msg) if not isinstance(named_range, NamedRange): msg = '%s refers to a value, not a range' % range_string raise NamedRangeException(msg) result = [] for destination in named_range.destinations: worksheet, cells_range = destination if worksheet is not self: msg = 'Range %s is not defined on worksheet %s' % \ (cells_range, self.title) raise NamedRangeException(msg) for row in self.iter_rows(cells_range): result.extend(row) return tuple(result) @deprecated(""" Use .iter_rows() working with coordinates 'A1:D4', and .get_squared_range() when working with indices (1, 1, 4, 4) and .get_named_range() for named ranges""") def range(self, range_string, row=0, column=0): """Returns a 2D array of cells, with optional row and column offsets. :param range_string: cell range string or `named range` name :type range_string: string :param row: number of rows to offset :type row: int :param column: number of columns to offset :type column: int :rtype: tuples of tuples of :class:`openpyxl.cell.Cell` """ _rs = range_string.upper() m = ABSOLUTE_RE.match(_rs) # R1C1 range if m is not None: rows = self.iter_rows(_rs, row_offset=row, column_offset=column) return tuple(row for row in rows) else: return self.get_named_range(range_string) @deprecated("Access styles directly from cells, columns or rows") def get_style(self, coordinate): """Return a copy of the style object for the specified cell.""" try: obj = self[coordinate] except ValueError: if isinstance(coordinate, int): obj = self.row_dimensions[obj] else: obj = self.column_dimensions[obj] return obj.style @deprecated("Set styles directly on cells, columns or rows") def set_style(self, coordinate, style): try: obj = self[coordinate] except ValueError: if isinstance(coordinate, int): obj = self.row_dimensions[obj] else: obj = self.column_dimensions[obj] obj.style = style def set_printer_settings(self, paper_size, orientation): """Set printer settings """ self.page_setup.paperSize = paper_size if orientation not in (self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE): raise ValueError( "Values should be %s or %s" % (self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE)) self.page_setup.orientation = orientation @deprecated('this method is private and should not be called directly') def create_relationship(self, rel_type): return self._create_relationship(rel_type) def _create_relationship(self, rel_type): """Add a relationship for this sheet.""" rel = Relationship(rel_type) self.relationships.append(rel) rel_id = self.relationships.index(rel) rel.id = 'rId' + str(rel_id + 1) return self.relationships[rel_id] def add_data_validation(self, data_validation): """ Add a data-validation object to the sheet. The data-validation object defines the type of data-validation to be applied and the cell or range of cells it should apply to. """ data_validation._sheet = self self._data_validations.append(data_validation) def add_chart(self, chart): """ Add a chart to the sheet """ chart._sheet = self self._charts.append(chart) self.add_drawing(chart) def add_image(self, img): """ Add an image to the sheet """ img._sheet = self self._images.append(img) self.add_drawing(img) def add_drawing(self, obj): """Images and charts both create drawings""" self._parent.drawings.append(obj) def add_rel(self, obj): """Drawings and hyperlinks create relationships""" self._parent.relationships.append(obj) def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if (start_row is None or start_column is None or end_row is None or end_column is None): msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column), start_row, get_column_letter(end_column), end_row) elif ":" not in range_string: if COORD_RE.match(range_string): return # Single cell msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') if range_string not in self._merged_cells: self._merged_cells.append(range_string) cells = cells_from_range(range_string) # only the top-left cell is preserved for c in islice(chain.from_iterable(cells), 1, None): if c in self._cells: del self._cells[c] @property def merged_cells(self): """Utility for checking whether a cell has been merged or not""" cells = set() for _range in self._merged_cells: for row in cells_from_range(_range): cells = cells.union(set(row)) return cells @property def merged_cell_ranges(self): """Public attribute for which cells have been merged""" return self._merged_cells def unmerge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Remove merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string: if start_row is None or start_column is None or end_row is None or end_column is None: msg = "You have to provide a value either for "\ "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise InsufficientCoordinatesException(msg) else: range_string = '%s%s:%s%s' % ( get_column_letter(start_column), start_row, get_column_letter(end_column), end_row) elif len(range_string.split(':')) != 2: msg = "Range must be a cell range (e.g. A1:E1)" raise InsufficientCoordinatesException(msg) else: range_string = range_string.replace('$', '') if range_string in self._merged_cells: self._merged_cells.remove(range_string) else: msg = 'Cell range %s not known as merged.' % range_string raise InsufficientCoordinatesException(msg) def append(self, iterable): """Appends a group of values at the bottom of the current sheet. * If it's a list: all values are added in order, starting from the first column * If it's a dict: values are assigned to the columns indicated by the keys (numbers or letters) :param iterable: list, range or generator, or dict containing values to append :type iterable: list/tuple/range/generator or dict Usage: * append(['This is A1', 'This is B1', 'This is C1']) * **or** append({'A' : 'This is A1', 'C' : 'This is C1'}) * **or** append({1 : 'This is A1', 3 : 'This is C1'}) :raise: TypeError when iterable is neither a list/tuple nor a dict """ row_idx = self.max_row + 1 if (isinstance(iterable, (list, tuple, range)) or isgenerator(iterable)): for col_idx, content in enumerate(iterable, 1): col = get_column_letter(col_idx) if isinstance(content, Cell): # compatible with write-only mode cell = content cell.parent = self cell.column = col cell.row = row_idx cell.coordinate = "%s%s" % (col, row_idx) self._add_cell(cell) else: cell = self._new_cell(col, row_idx, content) elif isinstance(iterable, dict): for col_idx, content in iteritems(iterable): if isinstance(col_idx, basestring): col_idx = column_index_from_string(col_idx) self.cell(row=row_idx, column=col_idx, value=content) else: self._invalid_row(iterable) self.row_dimensions[row_idx] = RowDimension(worksheet=self, index=row_idx) def _invalid_row(self, iterable): raise TypeError( 'Value must be a list, tuple, range or generator, or a dict. Supplied value is {0}' .format(type(iterable))) @property def rows(self): """Iterate over all rows in the worksheet""" return tuple(self.iter_rows()) @property def columns(self): """Iterate over all columns in the worksheet""" max_row = self.max_row min_row = 1 cols = [] for col_idx in range(self.max_column): cells = self.get_squared_range(col_idx + 1, min_row, col_idx + 1, max_row) col = chain.from_iterable(cells) cols.append(tuple(col)) return tuple(cols) def point_pos(self, left=0, top=0): """ tells which cell is under the given coordinates (in pixels) counting from the top-left corner of the sheet. Can be used to locate images and charts on the worksheet """ current_col = 1 current_row = 1 column_dimensions = self.column_dimensions row_dimensions = self.row_dimensions default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH) default_height = points_to_pixels(DEFAULT_ROW_HEIGHT) left_pos = 0 top_pos = 0 while left_pos <= left: letter = get_column_letter(current_col) current_col += 1 if letter in column_dimensions: cdw = column_dimensions[letter].width if cdw is not None: left_pos += points_to_pixels(cdw) continue left_pos += default_width while top_pos <= top: row = current_row current_row += 1 if row in row_dimensions: rdh = row_dimensions[row].height if rdh is not None: top_pos += points_to_pixels(rdh) continue top_pos += default_height return (letter, row)