Example #1
0
def get_text(rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS)
    partial_text = text_node.text or unicode('')

    if text_node.get('{%s}space' % XML_NS) != 'preserve':
        partial_text = partial_text.strip()
    return unicode(partial_text)
Example #2
0
def get_text(rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS)
    text = text_node.text or unicode('')

    if text_node.get('{%s}space' % XML_NS) != 'preserve':
        text = text.strip()

    # fix XML escaping sequence for '_x'
    text = text.replace('x005F_', '')
    return unicode(text)
Example #3
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == Cell.TYPE_BOOL:
         return self._value == '1'
     elif self.is_date:
         return from_excel(self._value, self.base_date)
     elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == Cell.TYPE_STRING:
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Example #4
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     if value is None:
         return
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     if next(ILLEGAL_CHARACTERS_RE.finditer(value), None):
         raise IllegalCharacterError
     return value
Example #5
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == 'n':
         if is_date_format(self.number_format):
             return from_excel(self._value, self.base_date)
         return self._value
     if self.data_type == 'b':
         return self._value == '1'
     elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == 's':
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Example #6
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     if value is None:
         return
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     if next(ILLEGAL_CHARACTERS_RE.finditer(value), None):
         raise IllegalCharacterError
     return value
Example #7
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     if ILLEGAL_CHARACTERS_RE.match(value):
         raise IllegalCharacterError
     # we require that newline is represented as "\n" in core,
     # not as "\r\n" or "\r"
     value = value.replace('\r\n', '\n')
     return value
Example #8
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == 'n':
         if is_date_format(self.number_format):
             return from_excel(self._value, self.base_date)
         return self._value
     if self.data_type == 'b':
         return self._value == '1'
     elif self.data_type in (Cell.TYPE_INLINE,
                             Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == 's':
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Example #9
0
 def to_tree(self, tagname):
     """
     Return as XML node
     """
     el = Element(tagname)
     el.text = unicode(self)
     return el
Example #10
0
    def set_formula(self, addr, formula):
        if addr in self.cellmap:
            cell = self.cellmap[addr]
        else:
            raise Exception('Cell %s not in cellmap' % addr)

        seeds = [cell]

        if cell.is_range:
            for index, c in enumerate(cell.range.cells): # for each cell of the range, translate the formula
                if index == 0:
                    c.formula = formula
                    translator = Translator(unicode('=' +    formula), c.address().split('!')[1]) # the Translator needs a reference without sheet
                else:
                    translated = translator.translate_formula(c.address().split('!')[1]) # the Translator needs a reference without sheet
                    c.formula = translated[1:] # to get rid of the '='

                seeds.append(c)
        else:
            cell.formula = formula

        cellmap, G = graph_from_seeds(seeds, self)

        self.cellmap = cellmap
        self.G = G

        should_eval = self.cellmap[addr].should_eval
        self.cellmap[addr].should_eval = 'always'
        self.evaluate(addr)
        self.cellmap[addr].should_eval = should_eval

        print("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap)))
Example #11
0
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile
    FUT = _get_xml_iter
    s = ""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert isinstance(stream, tempfile), type(stream)
    f.close()

    from zipfile import ZipFile
    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    z.close()
Example #12
0
def _add_table_headers(ws):
    """
    Check if tables have tableColumns and create them and autoFilter if necessary.
    Column headers will be taken from the first row of the table.
    """

    tables = TablePartList()

    for table in ws._tables:
        if not table.tableColumns:
            table._initialise_columns()
            if table.headerRowCount:
                row = ws[table.ref][0]
                for cell, col in zip(row, table.tableColumns):
                    if cell.data_type != "s":
                        warn(
                            "File may not be readable: column headings must be strings."
                        )
                    col.name = unicode(cell.value)
        rel = Relationship(Type=table._rel_type, Target="")
        ws._rels.append(rel)
        table._rel_id = rel.Id
        tables.append(Related(id=rel.Id))

    return tables
    def test_xfs_fonts(self):
        st = Style(font=Font(size=12, bold=True))
        self.worksheet.cell('A1').style = st
        w = StyleWriter(self.workbook)

        nft = borders = fills = DummyElement()
        fonts = Element("fonts")
        w._write_cell_xfs(nft, fonts, fills, borders)
        xml = unicode(tostring(w._root))
        assert """applyFont="1" """ in xml
        assert """fontId="1" """ in xml

        expected = """
        <fonts count="2">
        <font>
            <sz val="12.0" />
            <color rgb="00000000"></color>
            <name val="Calibri" />
            <family val="2" />
            <b></b>
        </font>
        </fonts>
        """
        xml = tostring(fonts)
        diff = compare_xml(xml, expected)
        assert diff is None, diff
Example #14
0
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile

    FUT = _get_xml_iter
    s = b""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert stream == f
    f.close()

    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")

    try:
        z.close()
    except IOError:
        # you can't just close zipfiles in Windows
        z.close() # python 2.7
Example #15
0
    def __str__(self):
        """
        Pack parts into a single string
        """
        TRANSFORM = {
            '&[Tab]': '&A',
            '&[Pages]': '&N',
            '&[Date]': '&D',
            '&[Path]': '&Z',
            '&[Page]': '&P',
            '&[Time]': '&T',
            '&[File]': '&F',
            '&[Picture]': '&G'
        }

        # escape keys and create regex
        SUBS_REGEX = re.compile("|".join(
            ["({0})".format(re.escape(k)) for k in TRANSFORM]))

        def replace(match):
            """
            Callback for re.sub
            Replace expanded control with mini-format equivalent
            """
            sub = match.group(0)
            return TRANSFORM[sub]

        txt = []
        for key, part in zip(self.__keys,
                             [self.left, self.center, self.right]):
            if part.text is not None:
                txt.append(u"&{0}{1}".format(key, unicode(part)))
        txt = "".join(txt)
        txt = SUBS_REGEX.sub(replace, txt)
        return escape(txt)
Example #16
0
    def set_formula(self, addr, formula):
        if addr in self.cellmap:
            cell = self.cellmap[addr]
        else:
            raise Exception('Cell %s not in cellmap' % addr)

        seeds = [cell]

        if cell.is_range:
            for index, c in enumerate(cell.range.cells): # for each cell of the range, translate the formula
                if index == 0:
                    c.formula = formula
                    translator = Translator(unicode('=' +    formula), c.address().split('!')[1]) # the Translator needs a reference without sheet
                else:
                    translated = translator.translate_formula(c.address().split('!')[1]) # the Translator needs a reference without sheet
                    c.formula = translated[1:] # to get rid of the '='

                seeds.append(c)
        else:
            cell.formula = formula

        cellmap, G = graph_from_seeds(seeds, self)

        self.cellmap = cellmap
        self.G = G

        should_eval = self.cellmap[addr].should_eval
        self.cellmap[addr].should_eval = 'always'
        self.evaluate(addr)
        self.cellmap[addr].should_eval = should_eval

        print("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap)))
Example #17
0
 def to_tree(self, tagname):
     """
     Return as XML node
     """
     el = Element(tagname)
     el.text = unicode(self)
     return el
Example #18
0
    def __str__(self):
        """
        Pack parts into a single string
        """
        TRANSFORM = {'&[Tab]': '&A', '&[Pages]': '&N', '&[Date]': '&D',
                     '&[Path]': '&Z', '&[Page]': '&P', '&[Time]': '&T', '&[File]': '&F',
                     '&[Picture]': '&G'}

        # escape keys and create regex
        SUBS_REGEX = re.compile("|".join(["({0})".format(re.escape(k))
                                          for k in TRANSFORM]))

        def replace(match):
            """
            Callback for re.sub
            Replace expanded control with mini-format equivalent
            """
            sub = match.group(0)
            return TRANSFORM[sub]

        txt = []
        for key, part in zip(
            self.__keys, [self.left, self.center, self.right]):
            if part.text is not None:
                txt.append(u"&{0}{1}".format(key, unicode(part)))
        txt = "".join(txt)
        txt = SUBS_REGEX.sub(replace, txt)
        return escape(txt)
Example #19
0
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile
    FUT = _get_xml_iter
    s = b""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert isinstance(stream, tempfile), type(stream)
    f.close()

    from zipfile import ZipFile
    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    z.close()
Example #20
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     if value is None:
         return
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     if next(ILLEGAL_CHARACTERS_RE.finditer(value), None):
         raise IllegalCharacterError
     # we require that newline is represented as "\n" in core,
     # not as "\r\n" or "\r"
     value = value.replace('\r\n', '\n')
     return value
Example #21
0
def test_read_complex_formulae():
    null_file = os.path.join(DATADIR, 'reader', 'formulae.xlsx')
    wb = load_workbook(null_file)
    ws = wb.get_active_sheet()

    # Test normal forumlae
    assert ws.cell('A1').data_type != 'f'
    assert ws.cell('A2').data_type != 'f'
    assert ws.cell('A3').data_type == 'f'
    assert 'A3' not in ws.formula_attributes
    assert ws.cell('A3').value == '=12345'
    assert ws.cell('A4').data_type == 'f'
    assert 'A4' not in ws.formula_attributes
    assert ws.cell('A4').value == '=A2+A3'
    assert ws.cell('A5').data_type == 'f'
    assert 'A5' not in ws.formula_attributes
    assert ws.cell('A5').value == '=SUM(A2:A4)'

    # Test unicode
    expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)'
    # Hack to prevent pytest doing it's own unicode conversion
    try:
        expected = unicode(expected, "UTF8")
    except TypeError:
        pass
    assert ws['A16'].value == expected

    # Test shared forumlae
    assert ws.cell('B7').data_type == 'f'
    assert ws.formula_attributes['B7']['t'] == 'shared'
    assert ws.formula_attributes['B7']['si'] == '0'
    assert ws.formula_attributes['B7']['ref'] == 'B7:E7'
    assert ws.cell('B7').value == '=B4*2'
    assert ws.cell('C7').data_type == 'f'
    assert ws.formula_attributes['C7']['t'] == 'shared'
    assert ws.formula_attributes['C7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['C7']
    assert ws.cell('C7').value == '='
    assert ws.cell('D7').data_type == 'f'
    assert ws.formula_attributes['D7']['t'] == 'shared'
    assert ws.formula_attributes['D7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['D7']
    assert ws.cell('D7').value == '='
    assert ws.cell('E7').data_type == 'f'
    assert ws.formula_attributes['E7']['t'] == 'shared'
    assert ws.formula_attributes['E7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['E7']
    assert ws.cell('E7').value == '='

    # Test array forumlae
    assert ws.cell('C10').data_type == 'f'
    assert 'ref' not in ws.formula_attributes['C10']['ref']
    assert ws.formula_attributes['C10']['t'] == 'array'
    assert 'si' not in ws.formula_attributes['C10']
    assert ws.formula_attributes['C10']['ref'] == 'C10:C14'
    assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)'
    assert ws.cell('C11').data_type != 'f'
def test_read_complex_formulae(datadir):
    datadir.join("reader").chdir()
    wb = load_workbook('formulae.xlsx')
    ws = wb.get_active_sheet()

    # Test normal forumlae
    assert ws.cell('A1').data_type != 'f'
    assert ws.cell('A2').data_type != 'f'
    assert ws.cell('A3').data_type == 'f'
    assert 'A3' not in ws.formula_attributes
    assert ws.cell('A3').value == '=12345'
    assert ws.cell('A4').data_type == 'f'
    assert 'A4' not in ws.formula_attributes
    assert ws.cell('A4').value == '=A2+A3'
    assert ws.cell('A5').data_type == 'f'
    assert 'A5' not in ws.formula_attributes
    assert ws.cell('A5').value == '=SUM(A2:A4)'

    # Test unicode
    expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)'
    # Hack to prevent pytest doing it's own unicode conversion
    try:
        expected = unicode(expected, "UTF8")
    except TypeError:
        pass
    assert ws['A16'].value == expected

    # Test shared forumlae
    assert ws.cell('B7').data_type == 'f'
    assert ws.formula_attributes['B7']['t'] == 'shared'
    assert ws.formula_attributes['B7']['si'] == '0'
    assert ws.formula_attributes['B7']['ref'] == 'B7:E7'
    assert ws.cell('B7').value == '=B4*2'
    assert ws.cell('C7').data_type == 'f'
    assert ws.formula_attributes['C7']['t'] == 'shared'
    assert ws.formula_attributes['C7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['C7']
    assert ws.cell('C7').value == '='
    assert ws.cell('D7').data_type == 'f'
    assert ws.formula_attributes['D7']['t'] == 'shared'
    assert ws.formula_attributes['D7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['D7']
    assert ws.cell('D7').value == '='
    assert ws.cell('E7').data_type == 'f'
    assert ws.formula_attributes['E7']['t'] == 'shared'
    assert ws.formula_attributes['E7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['E7']
    assert ws.cell('E7').value == '='

    # Test array forumlae
    assert ws.cell('C10').data_type == 'f'
    assert 'ref' not in ws.formula_attributes['C10']['ref']
    assert ws.formula_attributes['C10']['t'] == 'array'
    assert 'si' not in ws.formula_attributes['C10']
    assert ws.formula_attributes['C10']['ref'] == 'C10:C14'
    assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)'
    assert ws.cell('C11').data_type != 'f'
Example #23
0
def test_read_complex_formulae(datadir):
    datadir.join("reader").chdir()
    wb = load_workbook("formulae.xlsx")
    ws = wb.get_active_sheet()

    # Test normal forumlae
    assert ws.cell("A1").data_type != "f"
    assert ws.cell("A2").data_type != "f"
    assert ws.cell("A3").data_type == "f"
    assert "A3" not in ws.formula_attributes
    assert ws.cell("A3").value == "=12345"
    assert ws.cell("A4").data_type == "f"
    assert "A4" not in ws.formula_attributes
    assert ws.cell("A4").value == "=A2+A3"
    assert ws.cell("A5").data_type == "f"
    assert "A5" not in ws.formula_attributes
    assert ws.cell("A5").value == "=SUM(A2:A4)"

    # Test unicode
    expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)'
    # Hack to prevent pytest doing it's own unicode conversion
    try:
        expected = unicode(expected, "UTF8")
    except TypeError:
        pass
    assert ws["A16"].value == expected

    # Test shared forumlae
    assert ws.cell("B7").data_type == "f"
    assert ws.formula_attributes["B7"]["t"] == "shared"
    assert ws.formula_attributes["B7"]["si"] == "0"
    assert ws.formula_attributes["B7"]["ref"] == "B7:E7"
    assert ws.cell("B7").value == "=B4*2"
    assert ws.cell("C7").data_type == "f"
    assert ws.formula_attributes["C7"]["t"] == "shared"
    assert ws.formula_attributes["C7"]["si"] == "0"
    assert "ref" not in ws.formula_attributes["C7"]
    assert ws.cell("C7").value == "="
    assert ws.cell("D7").data_type == "f"
    assert ws.formula_attributes["D7"]["t"] == "shared"
    assert ws.formula_attributes["D7"]["si"] == "0"
    assert "ref" not in ws.formula_attributes["D7"]
    assert ws.cell("D7").value == "="
    assert ws.cell("E7").data_type == "f"
    assert ws.formula_attributes["E7"]["t"] == "shared"
    assert ws.formula_attributes["E7"]["si"] == "0"
    assert "ref" not in ws.formula_attributes["E7"]
    assert ws.cell("E7").value == "="

    # Test array forumlae
    assert ws.cell("C10").data_type == "f"
    assert "ref" not in ws.formula_attributes["C10"]["ref"]
    assert ws.formula_attributes["C10"]["t"] == "array"
    assert "si" not in ws.formula_attributes["C10"]
    assert ws.formula_attributes["C10"]["ref"] == "C10:C14"
    assert ws.cell("C10").value == "=SUM(A10:A14*B10:B14)"
    assert ws.cell("C11").data_type != "f"
Example #24
0
def get_string(string_index_node):
    """Read the contents of a specific string index"""
    rich_nodes = string_index_node.findall('{%s}r' % SHEET_MAIN_NS)
    if rich_nodes:
        reconstructed_text = []
        for rich_node in rich_nodes:
            partial_text = get_text(rich_node)
            reconstructed_text.append(partial_text)
        return unicode(''.join(reconstructed_text))
    return get_text(string_index_node)
Example #25
0
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]
        current_row = min_row

        style_table = self._style_table
        for row, cells in groupby(self.get_cells(min_row, min_col,
                                                 max_row, max_col),
                                  operator.attrgetter('row')):
            full_row = []
            if current_row < row:

                for gap_row in xrange(current_row, row):
                    dummy_cells = get_missing_cells(gap_row, expected_columns)
                    yield tuple([dummy_cells[column] for column in expected_columns])
                    current_row = row

            temp_cells = list(cells)
            retrieved_columns = dict([(c.column, c) for c in temp_cells])
            missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))
            replacement_columns = get_missing_cells(row, missing_columns)

            for column in expected_columns:
                if column in retrieved_columns:
                    cell = retrieved_columns[column]
                    if cell.style_id is not None:
                        style = style_table[int(cell.style_id)]
                        cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212
                    if cell.internal_value is not None:
                        if cell.data_type in Cell.TYPE_STRING:
                            cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212
                        elif cell.data_type == Cell.TYPE_BOOL:
                            cell = cell._replace(internal_value=cell.internal_value == '1')
                        elif cell.is_date:
                            cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value)))
                        elif cell.data_type == Cell.TYPE_NUMERIC:
                            cell = cell._replace(internal_value=float(cell.internal_value))
                        elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
                            cell = cell._replace(internal_value=unicode(cell.internal_value))
                    full_row.append(cell)
                else:
                    full_row.append(replacement_columns[column])
            current_row = row + 1
            yield tuple(full_row)
Example #26
0
def get_string(string_index_node):
    """Read the contents of a specific string index"""
    rich_nodes = string_index_node.findall('{%s}r' % SHEET_MAIN_NS)
    if rich_nodes:
        reconstructed_text = []
        for rich_node in rich_nodes:
            partial_text = get_text(rich_node)
            reconstructed_text.append(partial_text)
        return unicode(''.join(reconstructed_text))
    return get_text(string_index_node)
Example #27
0
def doTransfer(file_name,db_name):
    # Replace with a database name
    con = sqlite3.connect(db_name)
    # replace with the complete path to youe excel workbook
    wb = load_workbook(filename=file_name)

    sheets = wb.sheetnames

    for sheet in sheets:
        ws = wb[sheet]

        columns = []
        query = 'CREATE TABLE ' + str(slugify(sheet)) + '(ID INTEGER PRIMARY KEY AUTOINCREMENT'
        for row in next(ws.rows):
            query += ', ' + slugify(row.value) + ' TEXT'
            columns.append(slugify(row.value))
        query += ');'

        con.execute(query)

        tup = []
        for i, rows in enumerate(ws):
            tuprow = []
            if i == 0:
                continue
            for row in rows:
                tuprow.append(unicode(row.value).strip()) if unicode(row.value).strip() != 'None' else tuprow.append('')
            tup.append(tuple(tuprow))

        insQuery1 = 'INSERT INTO ' + str(slugify(sheet)) + '('
        insQuery2 = ''
        for col in columns:
            insQuery1 += col + ', '
            insQuery2 += '?, '
        insQuery1 = insQuery1[:-2] + ') VALUES('
        insQuery2 = insQuery2[:-2] + ')'
        insQuery = insQuery1 + insQuery2

        con.executemany(insQuery, tup)
        con.commit()

    con.close()
Example #28
0
class NamedRange(object):
    """A named group of cells

    Scope is a worksheet object or None for workbook scope names (the default)
    """
    __slots__ = ('name', 'destinations', 'scope')

    str_format = unicode('%s!%s')
    repr_format = unicode('<%s "%s">')

    def __init__(self, name, destinations, scope=None):
        self.name = name
        self.destinations = destinations
        self.scope = scope

    def __str__(self):
        return  ','.join([self.str_format % (sheet, name) for sheet, name in self.destinations])

    def __repr__(self):
        return  self.repr_format % (self.__class__.__name__, str(self))
Example #29
0
def to_str(my_string):
    # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
    if type(my_string) == str and str != unicode:
        return unicode(my_string, 'utf-8')
    elif type(my_string) == unicode:
        return my_string
    else:
        try:
            return str(my_string)
        except:
            print('Couldnt parse as string', type(my_string))
            return my_string
Example #30
0
def to_str(my_string):
    # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
    if type(my_string) == str and str != unicode:
        return unicode(my_string, 'utf-8')
    elif type(my_string) == unicode:
        return my_string
    else:
        try:
            return str(my_string)
        except:
            print('Couldnt parse as string', type(my_string))
            return my_string
Example #31
0
    def __init__(self, address, sheet = None, value=None, formula=None, is_range = False, is_named_range=False, should_eval='normal'):
        super(Cell,self).__init__()

        if is_named_range == False:

            # remove $'s
            address = address.replace('$','')

            sh,c,r = split_address(address)

            # both are empty
            if not sheet and not sh:
                raise Exception("Sheet name may not be empty for cell address %s" % address)
            # both exist but disagree
            elif sh and sheet and sh != sheet:
                raise Exception("Sheet name mismatch for cell address %s: %s vs %s" % (address,sheet, sh))
            elif not sh and sheet:
                sh = sheet
            else:
                pass

            # we assume a cell's location can never change
            self.__sheet = sheet.encode('utf-8') if sheet is not None else sheet

            self.__sheet = sh
            self.__col = c
            self.__row = int(r)
            self.__col_idx = col2num(c)

        else:
            self.__named_range = address
            self.__sheet = None
            self.__col = None
            self.__row = None
            self.__col_idx = None

        # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
        if type(formula) == str and str != unicode:
            self.__formula = unicode(formula, 'utf-8') if formula else None
        else:
            self.__formula = formula if formula else None

        self.__value = value
        self.python_expression = None
        self.need_update = False
        self.should_eval = should_eval
        self.__compiled_expression = None
        self.__is_range = is_range

        # every cell has a unique id
        self.__id = Cell.next_id()
Example #32
0
    def __init__(self, address, sheet = None, value=None, formula=None, is_range = False, is_named_range=False, should_eval='normal'):
        super(Cell,self).__init__()

        if is_named_range == False:

            # remove $'s
            address = address.replace('$','')

            sh,c,r = split_address(address)

            # both are empty
            if not sheet and not sh:
                raise Exception("Sheet name may not be empty for cell address %s" % address)
            # both exist but disagree
            elif sh and sheet and sh != sheet:
                raise Exception("Sheet name mismatch for cell address %s: %s vs %s" % (address,sheet, sh))
            elif not sh and sheet:
                sh = sheet
            else:
                pass

            # we assume a cell's location can never change
            self.__sheet = sheet.encode('utf-8') if sheet is not None else sheet

            self.__sheet = sh
            self.__col = c
            self.__row = int(r)
            self.__col_idx = col2num(c)

        else:
            self.__named_range = address
            self.__sheet = None
            self.__col = None
            self.__row = None
            self.__col_idx = None

        # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
        if type(formula) == str and str != unicode:
            self.__formula = unicode(formula, 'utf-8') if formula else None
        else:
            self.__formula = formula if formula else None

        self.__value = value
        self.python_expression = None
        self.need_update = False
        self.should_eval = should_eval
        self.__compiled_expression = None
        self.__is_range = is_range

        # every cell has a unique id
        self.__id = Cell.next_id()
Example #33
0
    def write_tables(self):
        tables = TablePartList()

        for table in self.ws._tables:
            if not table.tableColumns:
                table._initialise_columns()
                if table.headerRowCount:
                    row = self.ws[table.ref][0]
                    for cell, col in zip(row, table.tableColumns):
                        if cell.data_type != "s":
                            warn(
                                "File may not be readable: column headings must be strings."
                            )
                        col.name = unicode(cell.value)
            rel = Relationship(Type=table._rel_type, Target="")
            self._rels.append(rel)
            table._rel_id = rel.Id
            tables.append(Related(id=rel.Id))

        if tables:
            self.xf.send(tables.to_tree())
Example #34
0
def _add_table_headers(ws):
    """
    Check if tables have tableColumns and create them and autoFilter if necessary.
    Column headers will be taken from the first row of the table.
    """

    tables = TablePartList()

    for table in ws._tables:
        if not table.tableColumns:
            table._initialise_columns()
            if table.headerRowCount:
                row = ws[table.ref][0]
                for cell, col in zip(row, table.tableColumns):
                    if cell.data_type != "s":
                        warn("File may not be readable: column headings must be strings.")
                    col.name = unicode(cell.value)
        rel = Relationship(Type=table._rel_type, Target="")
        ws._rels.append(rel)
        table._rel_id = rel.Id
        tables.append(Related(id=rel.Id))

    return tables
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile

    FUT = _get_xml_iter
    s = b""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert stream == f
    f.close()

    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    # z.close()
    try:
        z.close()
    except IOError:
        # you can't just close zipfiles in Windows
        if z.fp is not None:
            z.fp.close() # python 2.6
        else:
            z.close() # python 2.7
Example #36
0
 def __repr__(self):
     return unicode("<Cell %s.%s>") % (self.parent.title, self.coordinate)
Example #37
0
 def __repr__(self):
     return unicode(self)
Example #38
0
def resolve_range(rng, should_flatten = False, sheet=''):
    # print 'RESOLVE RANGE splitting', rng
    if ':' not in rng:
        if '!' in rng:
            rng = rng.split('!')
        return ExcelError('#REF!', info = '%s is not a regular range, nor a named_range' % rng)
    sh, start, end = split_range(rng)

    if sh and sheet:
        if sh != sheet:
            raise Exception("Mismatched sheets %s and %s" % (sh,sheet))
        else:
            sheet += '!'
    elif sh and not sheet:
        sheet = sh + "!"
    elif sheet and not sh:
        sheet += "!"
    else:
        pass

    # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
    if type(sheet) == str and str != unicode:
        sheet = unicode(sheet, 'utf-8')
    if type(rng) == str and str != unicode:
        rng = unicode(rng, 'utf-8')

    key = rng+str(should_flatten)+sheet

    if key in resolve_range_cache:
        return resolve_range_cache[key]
    else:
        if not is_range(rng):  return ([sheet + rng],1,1)
        # single cell, no range
        if start.isdigit() and end.isdigit():
            # This copes with 1:1 style ranges
            start_col = "A"
            start_row = start
            end_col = "XFD"
            end_row = end
        elif start.isalpha() and end.isalpha():
            # This copes with A:A style ranges
            start_col = start
            start_row = 1
            end_col = end
            end_row = 2**20
        else:
            sh, start_col, start_row = split_address(start)
            sh, end_col, end_row = split_address(end)

        start_col_idx = col2num(start_col)
        end_col_idx = col2num(end_col);

        start_row = int(start_row)
        end_row = int(end_row)

        # Attempt to use Numpy, not relevant for now

        # num2col_vec = np.vectorize(num2col)
        # r = np.array([range(start_row, end_row + 1),]*nb_col, dtype='a5').T
        # c = num2col_vec(np.array([range(start_col_idx, end_col_idx + 1),]*nb_row))
        # if len(sheet)>0:
        #     s = np.chararray((nb_row, nb_col), itemsize=len(sheet))
        #     s[:] = sheet
        #     c = np.core.defchararray.add(s, c)
        # B = np.core.defchararray.add(c, r)


        # if start_col == end_col:
        #     data = B.T.tolist()[0]
        #     return data, len(data), 1
        # elif start_row == end_row:
        #     data = B.tolist()[0]
        #     return data, 1, len(data)
        # else:
        #     if should_flatten:
        #         return B.flatten().tolist(), 1, nb_col*nb_row
        #     else:
        #         return B.tolist(), nb_row, nb_col

        # single column
        if  start_col == end_col:
            nrows = end_row - start_row + 1
            data = [ "%s%s%s" % (s,c,r) for (s,c,r) in zip([sheet]*nrows,[start_col]*nrows,list(range(start_row,end_row+1)))]

            output = data,len(data),1

        # single row
        elif start_row == end_row:
            ncols = end_col_idx - start_col_idx + 1
            data = [ "%s%s%s" % (s,num2col(c),r) for (s,c,r) in zip([sheet]*ncols,list(range(start_col_idx,end_col_idx+1)),[start_row]*ncols)]
            output = data,1,len(data)

        # rectangular range
        else:
            cells = []
            for r in range(start_row,end_row+1):
                row = []
                for c in range(start_col_idx,end_col_idx+1):
                    row.append(sheet + num2col(c) + str(r))

                cells.append(row)

            if should_flatten:
                # flatten into one list
                l = list(flatten(cells, only_lists = True))
                output = l,len(cells), len(cells[0])
            else:
                output = cells, len(cells), len(cells[0])

        resolve_range_cache[key] = output
        return output
Example #39
0
def load_workbook(filename,
                  read_only=False,
                  use_iterators=False,
                  keep_vba=KEEP_VBA,
                  guess_types=False,
                  data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    read_only = read_only or use_iterators

    is_file_like = hasattr(filename, 'read')

    if not is_file_like and os.path.isfile(filename):
        file_format = os.path.splitext(filename)[-1]
        if file_format not in SUPPORTED_FORMATS:
            if file_format == '.xls':
                msg = ('openpyxl does not support the old .xls file format, '
                       'please use xlrd to read this file, or convert it to '
                       'the more recent .xlsx file format.')
            elif file_format == '.xlsb':
                msg = ('openpyxl does not support binary format .xlsb, '
                       'please convert this file to .xlsx format if you want '
                       'to open it with openpyxl')
            else:
                msg = ('openpyxl does not support %s file format, '
                       'please check you can open '
                       'it with Excel first. '
                       'Supported formats are: %s') % (
                           file_format, ','.join(SUPPORTED_FORMATS))
            raise InvalidFileException(msg)

    if is_file_like:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if getattr(filename, 'encoding', None) is not None:
            raise IOError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        f = repair_central_directory(filename, is_file_like)
        archive = ZipFile(f, 'r', ZIP_DEFLATED)

    wb = Workbook(guess_types=guess_types,
                  data_only=data_only,
                  read_only=read_only)

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, read_only, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
Example #40
0
class Worksheet(object):
    """Represents a worksheet.

    Do not create worksheets yourself,
    use :func:`openpyxl.workbook.Workbook.create_sheet` instead

    """
    repr_format = unicode('<Worksheet "%s">')
    bad_title_char_re = re.compile(r'[\\*?:/\[\]]')

    BREAK_NONE = 0
    BREAK_ROW = 1
    BREAK_COLUMN = 2

    SHEETSTATE_VISIBLE = 'visible'
    SHEETSTATE_HIDDEN = 'hidden'
    SHEETSTATE_VERYHIDDEN = 'veryHidden'

    # Paper size
    PAPERSIZE_LETTER = '1'
    PAPERSIZE_LETTER_SMALL = '2'
    PAPERSIZE_TABLOID = '3'
    PAPERSIZE_LEDGER = '4'
    PAPERSIZE_LEGAL = '5'
    PAPERSIZE_STATEMENT = '6'
    PAPERSIZE_EXECUTIVE = '7'
    PAPERSIZE_A3 = '8'
    PAPERSIZE_A4 = '9'
    PAPERSIZE_A4_SMALL = '10'
    PAPERSIZE_A5 = '11'

    # Page orientation
    ORIENTATION_PORTRAIT = 'portrait'
    ORIENTATION_LANDSCAPE = 'landscape'

    def __init__(self, parent_workbook, title='Sheet'):
        self._parent = parent_workbook
        self._title = ''
        if not title:
            self.title = 'Sheet%d' % (1 + len(self._parent.worksheets))
        else:
            self.title = title
        self.row_dimensions = {}
        self.column_dimensions = OrderedDict([])
        self.page_breaks = []
        self._cells = {}
        self._styles = {}
        self._charts = []
        self._images = []
        self._comment_count = 0
        self._merged_cells = []
        self.relationships = []
        self._data_validations = []
        self.selected_cell = 'A1'
        self.active_cell = 'A1'
        self.sheet_state = self.SHEETSTATE_VISIBLE
        self.page_setup = PageSetup()
        self.page_margins = PageMargins()
        self.header_footer = HeaderFooter()
        self.sheet_view = SheetView()
        self.protection = SheetProtection()
        self.show_gridlines = True
        self.print_gridlines = False
        self.show_summary_below = True
        self.show_summary_right = True
        self.default_row_dimension = RowDimension(self)
        self.default_column_dimension = ColumnDimension(self)
        self._auto_filter = AutoFilter()
        self._freeze_panes = None
        self.paper_size = None
        self.formula_attributes = {}
        self.orientation = None
        self.xml_source = None
        self.conditional_formatting = ConditionalFormatting()

    def __repr__(self):
        return self.repr_format % self.title

    @property
    def parent(self):
        return self._parent

    @property
    def encoding(self):
        return self._parent.encoding

    def garbage_collect(self):
        """Delete cells that are not storing a value."""
        delete_list = [coordinate for coordinate, cell in \
            iteritems(self._cells) if (not cell.merged and cell.value in ('', None) and \
            cell.comment is None and (coordinate not in self._styles or
            hash(cell.style) == _DEFAULTS_STYLE_HASH))]
        for coordinate in delete_list:
            del self._cells[coordinate]

    def get_cell_collection(self):
        """Return an unordered list of the cells in this worksheet."""
        return self._cells.values()

    @property
    def title(self):
        """Return the title for this sheet."""
        return self._title

    @title.setter
    def title(self, value):
        """Set a sheet title, ensuring it is valid.
           Limited to 31 characters, no special characters."""
        if self.bad_title_char_re.search(value):
            msg = 'Invalid character found in sheet title'
            raise SheetTitleException(msg)
        value = self.unique_sheet_name(value)
        if len(value) > 31:
            msg = 'Maximum 31 characters allowed in sheet title'
            raise SheetTitleException(msg)
        self._title = value

    def unique_sheet_name(self, value):
        # check if sheet_name already exists
        # do this *before* length check
        sheets = self._parent.get_sheet_names()
        if value in sheets:
            sheets = ",".join(sheets)
            sheet_title_regex = re.compile("(?P<title>%s)(?P<count>\d?),?" %
                                           value)
            matches = sheet_title_regex.findall(sheets)
            if matches:
                # use name, but append with the next highest integer
                counts = [int(idx) for (t, idx) in matches if idx.isdigit()]
                if counts:
                    highest = max(counts)
                else:
                    highest = 0
                value = "%s%d" % (value, highest + 1)
        return value

    @property
    def auto_filter(self):
        """Return :class:`~openpyxl.worksheet.AutoFilter` object.

        `auto_filter` attribute stores/returns string until 1.8. You should change your code like ``ws.auto_filter.ref = "A1:A3"``.

        .. versionchanged:: 1.9
        """
        return self._auto_filter

    @property
    def freeze_panes(self):
        return self._freeze_panes

    @freeze_panes.setter
    def freeze_panes(self, topLeftCell):
        if not topLeftCell:
            topLeftCell = None
        elif isinstance(topLeftCell, str):
            topLeftCell = topLeftCell.upper()
        else:  # Assume a cell
            topLeftCell = topLeftCell.coordinate
        if topLeftCell == 'A1':
            topLeftCell = None
        self._freeze_panes = topLeftCell

    def add_print_title(self, n, rows_or_cols='rows'):
        """ Print Titles are rows or columns that are repeated on each printed sheet.
        This adds n rows or columns at the top or left of the sheet
        """
        if rows_or_cols == 'cols':
            r = '$A:$%s' % get_column_letter(n)
        else:
            r = '$1:$%d' % n

        self.parent.create_named_range('_xlnm.Print_Titles', self, r, self)

    def cell(self, coordinate=None, row=None, column=None):
        """Returns a cell object based on the given coordinates.

        Usage: cell(coodinate='A15') **or** cell(row=15, column=1)

        If `coordinates` are not given, then row *and* column must be given.

        Cells are kept in a dictionary which is empty at the worksheet
        creation.  Calling `cell` creates the cell in memory when they
        are first accessed, to reduce memory usage.

        :param coordinate: coordinates of the cell (e.g. 'B12')
        :type coordinate: string

        :param row: row index of the cell (e.g. 4)
        :type row: int

        :param column: column index of the cell (e.g. 3)
        :type column: int

        :raise: InsufficientCoordinatesException when coordinate or (row and column) are not given

        :rtype: :class:`openpyxl.cell.Cell`

        """
        if not coordinate:
            if (row is None or column is None):
                msg = "You have to provide a value either for " \
                        "'coordinate' or for 'row' *and* 'column'"
                raise InsufficientCoordinatesException(msg)
            else:
                coordinate = '%s%s' % (get_column_letter(column + 1), row + 1)
        else:
            coordinate = coordinate.replace('$', '')

        return self._get_cell(coordinate)

    def _get_cell(self, coordinate):

        if not coordinate in self._cells:
            column, row = coordinate_from_string(coordinate)
            new_cell = openpyxl.cell.Cell(self, column, row)
            self._cells[coordinate] = new_cell
            if column not in self.column_dimensions:
                self.column_dimensions[column] = ColumnDimension(column)
            if row not in self.row_dimensions:
                self.row_dimensions[row] = RowDimension(row)
        return self._cells[coordinate]

    def __getitem__(self, key):
        """Convenience access by Excel style address"""
        if isinstance(key, slice):
            return self.range("{0}:{1}".format(key.start, key.stop))
        return self._get_cell(key)

    def __setitem__(self, key, value):
        self[key].value = value

    def get_highest_row(self):
        """Returns the maximum row index containing data

        :rtype: int
        """
        if self.row_dimensions:
            return max(self.row_dimensions)
        else:
            return 1

    def get_highest_column(self):
        """Get the largest value for column currently stored.

        :rtype: int
        """
        if self.column_dimensions:
            return max([
                column_index_from_string(column_index)
                for column_index in self.column_dimensions
            ])
        else:
            return 1

    def calculate_dimension(self):
        """Return the minimum bounding range for all cells containing data."""
        return 'A1:%s%d' % (get_column_letter(
            self.get_highest_column()), self.get_highest_row())

    def range(self, range_string, row=0, column=0):
        """Returns a 2D array of cells, with optional row and column offsets.

        :param range_string: cell range string or `named range` name
        :type range_string: string

        :param row: number of rows to offset
        :type row: int

        :param column: number of columns to offset
        :type column: int

        :rtype: tuples of tuples of :class:`openpyxl.cell.Cell`

        """
        if ':' in range_string:
            # R1C1 range
            result = []
            min_range, max_range = range_string.split(':')
            min_col, min_row = coordinate_from_string(min_range)
            max_col, max_row = coordinate_from_string(max_range)
            if column:
                min_col = get_column_letter(
                    column_index_from_string(min_col) + column)
                max_col = get_column_letter(
                    column_index_from_string(max_col) + column)
            min_col = column_index_from_string(min_col)
            max_col = column_index_from_string(max_col)
            cache_cols = {}
            for col in xrange(min_col, max_col + 1):
                cache_cols[col] = get_column_letter(col)
            rows = xrange(min_row + row, max_row + row + 1)
            cols = xrange(min_col, max_col + 1)
            for row in rows:
                new_row = []
                for col in cols:
                    new_row.append(self.cell('%s%s' % (cache_cols[col], row)))
                result.append(tuple(new_row))
            return tuple(result)
        else:
            try:
                return self.cell(coordinate=range_string,
                                 row=row,
                                 column=column)
            except CellCoordinatesException:
                pass

            # named range
            named_range = self._parent.get_named_range(range_string)
            if named_range is None:
                msg = '%s is not a valid range name' % range_string
                raise NamedRangeException(msg)
            if isinstance(named_range, NamedRangeContainingValue):
                msg = '%s refers to a value, not a range' % range_string
                raise NamedRangeException(msg)

            result = []
            for destination in named_range.destinations:

                worksheet, cells_range = destination

                if worksheet is not self:
                    msg = 'Range %s is not defined on worksheet %s' % \
                            (cells_range, self.title)
                    raise NamedRangeException(msg)

                content = self.range(cells_range)

                if isinstance(content, tuple):
                    for cells in content:
                        result.extend(cells)
                else:
                    result.append(content)

            if len(result) == 1:
                return result[0]
            else:
                return tuple(result)

    def get_style(self, coordinate, read_only=False):
        """Return the style object for the specified cell."""
        if not coordinate in self._styles:
            self._styles[coordinate] = Style()
        elif self._styles[coordinate].static and not read_only:
            self._styles[coordinate] = self._styles[coordinate].copy()
        return self._styles[coordinate]

    def set_printer_settings(self, paper_size, orientation):
        """Set printer settings """

        self.page_setup.paperSize = paper_size
        if orientation not in (self.ORIENTATION_PORTRAIT,
                               self.ORIENTATION_LANDSCAPE):
            raise ValueError(
                "Values should be %s or %s" %
                (self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE))
        self.page_setup.orientation = orientation

    def create_relationship(self, rel_type):
        """Add a relationship for this sheet."""
        rel = Relationship(rel_type)
        self.relationships.append(rel)
        rel_id = self.relationships.index(rel)
        rel.id = 'rId' + str(rel_id + 1)
        return self.relationships[rel_id]

    def add_data_validation(self, data_validation):
        """ Add a data-validation object to the sheet.  The data-validation
            object defines the type of data-validation to be applied and the
            cell or range of cells it should apply to.
        """
        data_validation._sheet = self
        self._data_validations.append(data_validation)

    def add_chart(self, chart):
        """ Add a chart to the sheet """
        chart._sheet = self
        self._charts.append(chart)
        self.add_drawing(chart)

    def add_image(self, img):
        """ Add an image to the sheet """
        img._sheet = self
        self._images.append(img)
        self.add_drawing(img)

    def add_drawing(self, obj):
        """Images and charts both create drawings"""
        self._parent.drawings.append(obj)

    def add_rel(self, obj):
        """Drawings and hyperlinks create relationships"""
        self._parent.relationships.append(obj)

    def merge_cells(self,
                    range_string=None,
                    start_row=None,
                    start_column=None,
                    end_row=None,
                    end_column=None):
        """ Set merge on a cell range.  Range is a cell range (e.g. A1:E1) """
        if not range_string:
            if start_row is None or start_column is None or end_row is None or end_column is None:
                msg = "You have to provide a value either for "\
                      "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'"
                raise InsufficientCoordinatesException(msg)
            else:
                range_string = '%s%s:%s%s' % (
                    get_column_letter(start_column + 1), start_row + 1,
                    get_column_letter(end_column + 1), end_row + 1)
        elif len(range_string.split(':')) != 2:
            msg = "Range must be a cell range (e.g. A1:E1)"
            raise InsufficientCoordinatesException(msg)
        else:
            range_string = range_string.replace('$', '')

        # Make sure top_left cell exists - is this necessary?
        min_col, min_row = coordinate_from_string(range_string.split(':')[0])
        max_col, max_row = coordinate_from_string(range_string.split(':')[1])
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col)
        # Blank out the rest of the cells in the range
        for col in xrange(min_col, max_col + 1):
            for row in xrange(min_row, max_row + 1):
                if not (row == min_row and col == min_col):
                    # PHPExcel adds cell and specifically blanks it out if it doesn't exist
                    self._get_cell('%s%s' %
                                   (get_column_letter(col), row)).value = None
                    self._get_cell('%s%s' %
                                   (get_column_letter(col), row)).merged = True

        if range_string not in self._merged_cells:
            self._merged_cells.append(range_string)

    def unmerge_cells(self,
                      range_string=None,
                      start_row=None,
                      start_column=None,
                      end_row=None,
                      end_column=None):
        """ Remove merge on a cell range.  Range is a cell range (e.g. A1:E1) """
        if not range_string:
            if start_row is None or start_column is None or end_row is None or end_column is None:
                msg = "You have to provide a value either for "\
                      "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'"
                raise InsufficientCoordinatesException(msg)
            else:
                range_string = '%s%s:%s%s' % (
                    get_column_letter(start_column + 1), start_row + 1,
                    get_column_letter(end_column + 1), end_row + 1)
        elif len(range_string.split(':')) != 2:
            msg = "Range must be a cell range (e.g. A1:E1)"
            raise InsufficientCoordinatesException(msg)
        else:
            range_string = range_string.replace('$', '')

        if range_string in self._merged_cells:
            self._merged_cells.remove(range_string)
            min_col, min_row = coordinate_from_string(
                range_string.split(':')[0])
            max_col, max_row = coordinate_from_string(
                range_string.split(':')[1])
            min_col = column_index_from_string(min_col)
            max_col = column_index_from_string(max_col)
            # Mark cell as unmerged
            for col in xrange(min_col, max_col + 1):
                for row in xrange(min_row, max_row + 1):
                    if not (row == min_row and col == min_col):
                        self._get_cell(
                            '%s%s' %
                            (get_column_letter(col), row)).merged = False
        else:
            msg = 'Cell range %s not known as merged.' % range_string
            raise InsufficientCoordinatesException(msg)

    def append(self, list_or_dict):
        """Appends a group of values at the bottom of the current sheet.

        * If it's a list: all values are added in order, starting from the first column
        * If it's a dict: values are assigned to the columns indicated by the keys (numbers or letters)

        :param list_or_dict: list or dict containing values to append
        :type list_or_dict: list/tuple or dict

        Usage:

        * append(['This is A1', 'This is B1', 'This is C1'])
        * **or** append({'A' : 'This is A1', 'C' : 'This is C1'})
        * **or** append({0 : 'This is A1', 2 : 'This is C1'})

        :raise: TypeError when list_or_dict is neither a list/tuple nor a dict

        """
        row_idx = len(self.row_dimensions)
        if isinstance(list_or_dict, (list, tuple)):
            for col_idx, content in enumerate(list_or_dict):
                self.cell(row=row_idx, column=col_idx).value = content

        elif isinstance(list_or_dict, dict):
            for col_idx, content in iteritems(list_or_dict):
                if isinstance(col_idx, basestring):
                    col_idx = column_index_from_string(col_idx) - 1
                self.cell(row=row_idx, column=col_idx).value = content

        else:
            raise TypeError('list_or_dict must be a list or a dict')

    @property
    def rows(self):
        return self.range(self.calculate_dimension())

    @property
    def columns(self):
        max_row = self.get_highest_row()
        cols = []
        for col_idx in range(self.get_highest_column()):
            col = get_column_letter(col_idx + 1)
            res = self.range('%s1:%s%d' % (col, col, max_row))
            cols.append(tuple([x[0] for x in res]))

        return tuple(cols)

    def point_pos(self, left=0, top=0):
        """ tells which cell is under the given coordinates (in pixels)
        counting from the top-left corner of the sheet.
        Can be used to locate images and charts on the worksheet """
        current_col = 1
        current_row = 1
        column_dimensions = self.column_dimensions
        row_dimensions = self.row_dimensions
        default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH)
        default_height = points_to_pixels(DEFAULT_ROW_HEIGHT)
        left_pos = 0
        top_pos = 0

        while left_pos <= left:
            letter = get_column_letter(current_col)
            current_col += 1
            if letter in column_dimensions:
                cdw = column_dimensions[letter].width
                if cdw > 0:
                    left_pos += points_to_pixels(cdw)
                    continue
            left_pos += default_width

        while top_pos <= top:
            row = current_row
            current_row += 1
            if row in row_dimensions:
                rdh = row_dimensions[row].height
                if rdh > 0:
                    top_pos += points_to_pixels(rdh)
                    continue
            top_pos += default_height

        return (letter, row)
Example #41
0
def resolve_range(rng, should_flatten = False, sheet=''):
    # print 'RESOLVE RANGE splitting', rng
    if ':' not in rng:
        if '!' in rng:
            rng = rng.split('!')
        return ExcelError('#REF!', info = '%s is not a regular range, nor a named_range' % rng)
    sh, start, end = split_range(rng)

    if sh and sheet:
        if sh != sheet:
            raise Exception("Mismatched sheets %s and %s" % (sh,sheet))
        else:
            sheet += '!'
    elif sh and not sheet:
        sheet = sh + "!"
    elif sheet and not sh:
        sheet += "!"
    else:
        pass

    # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
    if type(sheet) == str and str != unicode:
        sheet = unicode(sheet, 'utf-8')
    if type(rng) == str and str != unicode:
        rng = unicode(rng, 'utf-8')

    key = rng+str(should_flatten)+sheet

    if key in resolve_range_cache:
        return resolve_range_cache[key]
    else:
        if not is_range(rng):  return ([sheet + rng],1,1)
        # single cell, no range
        if start.isdigit() and end.isdigit():
            # This copes with 1:1 style ranges
            start_col = "A"
            start_row = start
            end_col = "XFD"
            end_row = end
        elif start.isalpha() and end.isalpha():
            # This copes with A:A style ranges
            start_col = start
            start_row = 1
            end_col = end
            end_row = 2**20
        else:
            sh, start_col, start_row = split_address(start)
            sh, end_col, end_row = split_address(end)

        start_col_idx = col2num(start_col)
        end_col_idx = col2num(end_col);

        start_row = int(start_row)
        end_row = int(end_row)

        # Attempt to use Numpy, not relevant for now

        # num2col_vec = np.vectorize(num2col)
        # r = np.array([range(start_row, end_row + 1),]*nb_col, dtype='a5').T
        # c = num2col_vec(np.array([range(start_col_idx, end_col_idx + 1),]*nb_row))
        # if len(sheet)>0:
        #     s = np.chararray((nb_row, nb_col), itemsize=len(sheet))
        #     s[:] = sheet
        #     c = np.core.defchararray.add(s, c)
        # B = np.core.defchararray.add(c, r)


        # if start_col == end_col:
        #     data = B.T.tolist()[0]
        #     return data, len(data), 1
        # elif start_row == end_row:
        #     data = B.tolist()[0]
        #     return data, 1, len(data)
        # else:
        #     if should_flatten:
        #         return B.flatten().tolist(), 1, nb_col*nb_row
        #     else:
        #         return B.tolist(), nb_row, nb_col

        # single column
        if  start_col == end_col:
            nrows = end_row - start_row + 1
            data = [ "%s%s%s" % (s,c,r) for (s,c,r) in zip([sheet]*nrows,[start_col]*nrows,list(range(start_row,end_row+1)))]

            output = data,len(data),1

        # single row
        elif start_row == end_row:
            ncols = end_col_idx - start_col_idx + 1
            data = [ "%s%s%s" % (s,num2col(c),r) for (s,c,r) in zip([sheet]*ncols,list(range(start_col_idx,end_col_idx+1)),[start_row]*ncols)]
            output = data,1,len(data)

        # rectangular range
        else:
            cells = []
            for r in range(start_row,end_row+1):
                row = []
                for c in range(start_col_idx,end_col_idx+1):
                    row.append(sheet + num2col(c) + str(r))

                cells.append(row)

            if should_flatten:
                # flatten into one list
                l = list(flatten(cells, only_lists = True))
                output = l,len(cells), len(cells[0])
            else:
                output = cells, len(cells), len(cells[0])

        resolve_range_cache[key] = output
        return output
Example #42
0
 def __repr__(self):
     return unicode("<Cell %s.%s>") % (self.parent.title, self.coordinate)
Example #43
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except:
         return unicode('#N/A')
Example #44
0
 def test_unicode(self, HeaderFooterItem):
     from openpyxl.compat import unicode
     hf = HeaderFooterItem()
     hf.left.text = u'D\xfcsseldorf'
     assert unicode(hf) == u'&LD\xfcsseldorf'
Example #45
0
 def test_unicode(self, _HeaderFooterPart):
     from openpyxl.compat import unicode
     hf = _HeaderFooterPart()
     hf.text = u"D\xfcsseldorf"
     assert unicode(hf) == u"D\xfcsseldorf"
Example #46
0
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    is_file_instance = isinstance(filename, file)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types, data_only=data_only)

    if use_iterators:
        wb._set_optimized_read()
        if guess_types:
            warnings.warn('Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
Example #47
0
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    is_file_instance = isinstance(filename, file)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        file_format = os.path.splitext(filename)[-1]
        if file_format not in SUPPORTED_FORMATS:
            if file_format == '.xls':
                msg = ('openpyxl does not support the old .xls file format, '
                       'please use xlrd to read this file, or convert it to '
                       'the more recent .xlsx file format.')
            elif file_format == '.xlsb':
                msg = ('openpyxl does not support binary format .xlsb, '
                       'please convert this file to .xlsx format if you want '
                       'to open it with openpyxl')
            else:
                msg = ('openpyxl does not support %s file format, '
                       'please check you can open '
                       'it with Excel first. '
                       'Supported formats are: %s') % (file_format,
                                                     ','.join(SUPPORTED_FORMATS))
            raise InvalidFileException(msg)

        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types, data_only=data_only)

    if use_iterators:
        wb._set_optimized_read()
        if guess_types:
            warnings.warn('Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
Example #48
0
 def __init__(self, val, alpha=None):
     self.val = unicode(val)
     self.alpha = alpha
    def append(self, row):
        """
        :param row: iterable containing values to append
        :type row: iterable
        """
        doc = self._get_content_generator()
        self._max_row += 1
        span = len(row)
        self._max_col = max(self._max_col, span)
        row_idx = self._max_row
        attrs = {'r': '%d' % row_idx, 'spans': '1:%d' % span}
        start_tag(doc, 'row', attrs)

        for col_idx, cell in enumerate(row):
            style = None
            comment = None
            if cell is None:
                continue
            elif isinstance(cell, dict):
                dct = cell
                cell = dct.get('value')
                if cell is None:
                    continue
                style = dct.get('style')
                comment = dct.get('comment')
                for ob, attr, cls in ((style, 'style', Style),
                                      (comment, 'comment', Comment)):
                    if ob is not None and not isinstance(ob, cls):
                        raise TypeError('%s should be a %s not a %s' %
                                        (attr, cls.__class__.__name__,
                                         ob.__class__.__name__))

            column = get_column_letter(col_idx + 1)
            coordinate = '%s%d' % (column, row_idx)
            attributes = {'r': coordinate}
            if comment is not None:
                comment._parent = CommentParentCell(coordinate, row_idx,
                                                    column)
                self._comments.append(comment)
                self._comment_count += 1

            if isinstance(cell, bool):
                dtype = 'boolean'
            elif isinstance(cell, NUMERIC_TYPES):
                dtype = 'numeric'
            elif isinstance(cell, TIME_TYPES):
                dtype = 'datetime'
                if isinstance(cell, datetime.date):
                    cell = to_excel(cell)
                elif isinstance(cell, datetime.time):
                    cell = time_to_days(cell)
                elif isinstance(cell, datetime.timedelta):
                    cell = timedelta_to_days(cell)
                if style is None:
                    # allow user-defined style if needed
                    style = STYLES[dtype]['style']
            elif cell and cell[0] == '=':
                dtype = 'formula'
            else:
                dtype = 'string'
                cell = self._strings.add(unicode(cell))

            if style is not None:
                attributes['s'] = '%d' % self._styles.add(style)

            if dtype != 'formula':
                attributes['t'] = STYLES[dtype]['type']
            start_tag(doc, 'c', attributes)

            if dtype == 'formula':
                tag(doc, 'f', body='%s' % cell[1:])
                tag(doc, 'v')
            elif dtype == 'boolean':
                tag(doc, 'v', body='%d' % cell)
            else:
                tag(doc, 'v', body='%s' % cell)
            end_tag(doc, 'c')
        end_tag(doc, 'row')
 def test_repr(self, Reference):
     ref = Reference(
         range_string=b'D\xc3\xbcsseldorf!A1:A10'.decode("utf8"))
     assert unicode(ref) == b'D\xc3\xbcsseldorf!$A$1:$A$10'.decode("utf8")
Example #51
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except:
         return unicode('#N/A')
Example #52
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except UnicodeDecodeError:
         return u"#N/A"
Example #53
0
def load_workbook(filename,
                  use_iterators=False,
                  keep_vba=KEEP_VBA,
                  guess_types=False,
                  data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    is_file_instance = isinstance(filename, file)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types, data_only=data_only)

    if use_iterators:
        wb._set_optimized_read()
        if guess_types:
            warnings.warn(
                'Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
Example #54
0
class Worksheet(object):
    """Represents a worksheet.

    Do not create worksheets yourself,
    use :func:`openpyxl.workbook.Workbook.create_sheet` instead

    """
    repr_format = unicode('<Worksheet "%s">')
    bad_title_char_re = re.compile(r'[\\*?:/\[\]]')

    BREAK_NONE = 0
    BREAK_ROW = 1
    BREAK_COLUMN = 2

    SHEETSTATE_VISIBLE = 'visible'
    SHEETSTATE_HIDDEN = 'hidden'
    SHEETSTATE_VERYHIDDEN = 'veryHidden'

    # Paper size
    PAPERSIZE_LETTER = '1'
    PAPERSIZE_LETTER_SMALL = '2'
    PAPERSIZE_TABLOID = '3'
    PAPERSIZE_LEDGER = '4'
    PAPERSIZE_LEGAL = '5'
    PAPERSIZE_STATEMENT = '6'
    PAPERSIZE_EXECUTIVE = '7'
    PAPERSIZE_A3 = '8'
    PAPERSIZE_A4 = '9'
    PAPERSIZE_A4_SMALL = '10'
    PAPERSIZE_A5 = '11'

    # Page orientation
    ORIENTATION_PORTRAIT = 'portrait'
    ORIENTATION_LANDSCAPE = 'landscape'

    def __init__(self, parent_workbook, title='Sheet'):
        self._parent = parent_workbook
        self._title = ''
        if not title:
            self.title = 'Sheet%d' % (1 + len(self._parent.worksheets))
        else:
            self.title = title
        self.row_dimensions = {}
        self.column_dimensions = DimensionHolder(worksheet=self, direction=[])
        self.page_breaks = []
        self._cells = {}
        self._styles = {}
        self._charts = []
        self._images = []
        self._comment_count = 0
        self._merged_cells = []
        self.relationships = []
        self._data_validations = []
        self.sheet_state = self.SHEETSTATE_VISIBLE
        self.page_setup = PageSetup()
        self.print_options = PrintOptions()
        self.page_margins = PageMargins()
        self.header_footer = HeaderFooter()
        self.sheet_view = SheetView()
        self.protection = SheetProtection()
        self.default_row_dimension = RowDimension(worksheet=self)
        self.default_column_dimension = ColumnDimension(worksheet=self)
        self._auto_filter = AutoFilter()
        self._freeze_panes = None
        self.paper_size = None
        self.formula_attributes = {}
        self.orientation = None
        self.conditional_formatting = ConditionalFormatting()
        self.vba_controls = None
        self.sheet_properties = WorksheetProperties()
        self.sheet_properties.outlinePr = Outline(summaryBelow=True,
                                                  summaryRight=True)

    @property
    def selected_cell(self):
        return self.sheet_view.selection.sqref

    @property
    def active_cell(self):
        return self.sheet_view.selection.activeCell

    @property
    def show_gridlines(self):
        return self.sheet_view.showGridLines

    def __repr__(self):
        return self.repr_format % self.title

    """ To keep compatibility with previous versions"""

    @property
    def show_summary_below(self):
        return self.sheet_properties.outlinePr.summaryBelow

    @property
    def show_summary_right(self):
        return self.sheet_properties.outlinePr.summaryRight

    @property
    def vba_code(self):
        for attr in ("codeName", "enableFormatConditionsCalculation",
                     "filterMode", "published", "syncHorizontal", "syncRef",
                     "syncVertical", "transitionEvaluation",
                     "transitionEntry"):
            value = getattr(self.sheet_properties, attr)
            if value is not None:
                yield attr, safe_string(value)

    @vba_code.setter
    def vba_code(self, value):
        for k, v in value.items():
            if k in ("codeName", "enableFormatConditionsCalculation",
                     "filterMode", "published", "syncHorizontal", "syncRef",
                     "syncVertical", "transitionEvaluation",
                     "transitionEntry"):
                setattr(self.sheet_properties, k, v)

    """ End To keep compatibility with previous versions"""

    @property
    def parent(self):
        return self._parent

    @property
    def encoding(self):
        return self._parent.encoding

    @deprecated('this method is private and should not be called directly')
    def garbage_collect(self):
        self._garbage_collect()

    def _garbage_collect(self):
        """Delete cells that are not storing a value."""
        delete_list = []
        for coordinate, cell in iteritems(self._cells):
            if (cell.value in ('', None) and cell.comment is None
                    and (coordinate not in self._styles
                         or cell.style == DEFAULTS_STYLE)):
                delete_list.append(coordinate)
        for coordinate in delete_list:
            del self._cells[coordinate]

    def get_cell_collection(self):
        """Return an unordered list of the cells in this worksheet."""
        return self._cells.values()

    @property
    def title(self):
        """Return the title for this sheet."""
        return self._title

    @title.setter
    def title(self, value):
        """Set a sheet title, ensuring it is valid.
           Limited to 31 characters, no special characters."""
        if self.bad_title_char_re.search(value):
            msg = 'Invalid character found in sheet title'
            raise SheetTitleException(msg)
        value = self._unique_sheet_name(value)
        if len(value) > 31:
            msg = 'Maximum 31 characters allowed in sheet title'
            raise SheetTitleException(msg)
        self._title = value

    @deprecated('this method is private and should not be called directly')
    def unique_sheet_name(self, value):
        return self._unique_sheet_name(value)

    def _unique_sheet_name(self, value):
        # check if sheet_name already exists
        # do this *before* length check
        sheets = self._parent.get_sheet_names()
        if value in sheets:
            sheets = ",".join(sheets)
            sheet_title_regex = re.compile("(?P<title>%s)(?P<count>\d?),?" %
                                           re.escape(value))
            matches = sheet_title_regex.findall(sheets)
            if matches:
                # use name, but append with the next highest integer
                counts = [int(idx) for (t, idx) in matches if idx.isdigit()]
                if counts:
                    highest = max(counts)
                else:
                    highest = 0
                value = "%s%d" % (value, highest + 1)
        return value

    @property
    def auto_filter(self):
        """Return :class:`~openpyxl.worksheet.AutoFilter` object.

        `auto_filter` attribute stores/returns string until 1.8. You should change your code like ``ws.auto_filter.ref = "A1:A3"``.

        .. versionchanged:: 1.9
        """
        return self._auto_filter

    @property
    def freeze_panes(self):
        if self.sheet_view.pane is not None:
            return self.sheet_view.pane.topLeftCell

    @freeze_panes.setter
    def freeze_panes(self, topLeftCell):
        if not topLeftCell:
            topLeftCell = None
        elif isinstance(topLeftCell, str):
            topLeftCell = topLeftCell.upper()
        else:  # Assume a cell
            topLeftCell = topLeftCell.coordinate
        if topLeftCell == 'A1':
            topLeftCell = None

        if not topLeftCell:
            self.sheet_view.pane = None
            return

        if topLeftCell is not None:
            colName, row = coordinate_from_string(topLeftCell)
            column = column_index_from_string(colName)

        view = self.sheet_view
        view.pane = Pane(topLeftCell=topLeftCell,
                         activePane="topRight",
                         state="frozen")
        view.selection[0].pane = "topRight"

        if column > 1:
            view.pane.xSplit = column - 1
        if row > 1:
            view.pane.ySplit = row - 1
            view.pane.activePane = 'bottomLeft'
            view.selection[0].pane = "bottomLeft"
            if column > 1:
                view.selection[0].pane = "bottomRight"
                view.pane.activePane = 'bottomRight'

        if row > 1 and column > 1:
            sel = list(view.selection)
            sel.insert(0,
                       Selection(pane="topRight", activeCell=None, sqref=None))
            sel.insert(
                1, Selection(pane="bottomLeft", activeCell=None, sqref=None))
            view.selection = sel

    def add_print_title(self, n, rows_or_cols='rows'):
        """ Print Titles are rows or columns that are repeated on each printed sheet.
        This adds n rows or columns at the top or left of the sheet
        """

        scope = self.parent.get_index(self)

        if rows_or_cols == 'cols':
            r = '$A:$%s' % get_column_letter(n)
        else:
            r = '$1:$%d' % n

        self.parent.create_named_range('_xlnm.Print_Titles', self, r, scope)

    def cell(self, coordinate=None, row=None, column=None, value=None):
        """Returns a cell object based on the given coordinates.

        Usage: cell(coodinate='A15') **or** cell(row=15, column=1)

        If `coordinates` are not given, then row *and* column must be given.

        Cells are kept in a dictionary which is empty at the worksheet
        creation.  Calling `cell` creates the cell in memory when they
        are first accessed, to reduce memory usage.

        :param coordinate: coordinates of the cell (e.g. 'B12')
        :type coordinate: string

        :param row: row index of the cell (e.g. 4)
        :type row: int

        :param column: column index of the cell (e.g. 3)
        :type column: int

        :raise: InsufficientCoordinatesException when coordinate or (row and column) are not given

        :rtype: :class:openpyxl.cell.Cell

        """
        if coordinate is None:
            if (row is None or column is None):
                msg = "You have to provide a value either for " \
                    "'coordinate' or for 'row' *and* 'column'"
                raise InsufficientCoordinatesException(msg)
            else:
                column = get_column_letter(column)
                coordinate = '%s%s' % (column, row)
        else:
            coordinate = coordinate.upper().replace('$', '')

        if coordinate not in self._cells:
            if row is None or column is None:
                column, row = coordinate_from_string(coordinate)
            self._new_cell(column, row, value)

        return self._cells[coordinate]

    def _get_cell(self, coordinate):
        """
        Internal method for getting a cell from a worksheet.
        Will create a new cell if one doesn't already exist.
        """
        coordinate = coordinate.upper()
        if not coordinate in self._cells:
            column, row = coordinate_from_string(coordinate)
            self._new_cell(column, row)
        return self._cells[coordinate]

    def _new_cell(self, column, row, value=None):
        cell = Cell(self, column, row, value)
        self._add_cell(cell)

    def _add_cell(self, cell):
        """
        Internal method for adding cell objects.
        """
        column = cell.column
        row = cell.row
        self._cells[cell.coordinate] = cell
        if column not in self.column_dimensions:
            self.column_dimensions[column] = ColumnDimension(index=column,
                                                             worksheet=self)
        if row not in self.row_dimensions:
            self.row_dimensions[row] = RowDimension(index=row, worksheet=self)
        self._cells[cell.coordinate] = cell

    def __getitem__(self, key):
        """Convenience access by Excel style address"""
        if isinstance(key, slice):
            return self.iter_rows("{0}:{1}".format(key.start, key.stop))
        if ":" in key:
            return self.iter_rows(key)
        return self._get_cell(key)

    def __setitem__(self, key, value):
        self[key].value = value

    def get_highest_row(self):
        """Returns the maximum row index containing data

        :rtype: int
        """
        if self.row_dimensions:
            return max(self.row_dimensions)
        else:
            return 0

    @property
    def min_row(self):
        if self.row_dimensions:
            return min(self.row_dimensions)
        else:
            return 1

    @property
    def max_row(self):
        return self.get_highest_row()

    def get_highest_column(self):
        """Get the largest value for column currently stored.

        :rtype: int
        """
        if self.column_dimensions:
            return max([
                column_index_from_string(column_index)
                for column_index in self.column_dimensions
            ])
        else:
            return 1

    @property
    def min_col(self):
        if self.column_dimensions:
            return max([
                column_index_from_string(column_index)
                for column_index in self.column_dimensions
            ])
        else:
            return 1

    @property
    def max_column(self):
        return self.get_highest_column()

    def calculate_dimension(self):
        """Return the minimum bounding range for all cells containing data."""
        return '%s%d:%s%d' % (get_column_letter(1), self.min_row,
                              get_column_letter(self.max_column
                                                or 1), self.max_row or 1)

    @property
    def dimensions(self):
        return self.calculate_dimension()

    def iter_rows(self, range_string=None, row_offset=0, column_offset=0):
        """
        Returns a squared range based on the `range_string` parameter,
        using generators.
        If no range is passed, will iterate over all cells in the worksheet

        :param range_string: range of cells (e.g. 'A1:C4')
        :type range_string: string

        :param row_offset: additional rows (e.g. 4)
        :type row: int

        :param column_offset: additonal columns (e.g. 3)
        :type column: int

        :rtype: generator
        """
        if range_string is not None:
            min_col, min_row, max_col, max_row = range_boundaries(
                range_string.upper())
        else:
            min_col, min_row, max_col, max_row = (1, 1, self.max_column,
                                                  self.max_row)
        if max_col is not None:
            max_col += column_offset
        if max_row is not None:
            max_row += row_offset
        return self.get_squared_range(min_col + column_offset,
                                      min_row + row_offset, max_col, max_row)

    def get_squared_range(self, min_col, min_row, max_col, max_row):
        """Returns a 2D array of cells

        :param min_col: smallest column index (1-based index)
        :type min_col: int

        :param min_row: smallest row index (1-based index)
        :type min_row: int

        :param max_col: largest column index (1-based index)
        :type max_col: int

        :param max_row: smallest row index (1-based index)
        :type max_row: int

        :rtype: generator
        """
        # Column name cache is very important in large files.
        cache = dict((col, get_column_letter(col))
                     for col in range(min_col, max_col + 1))
        for row in range(min_row, max_row + 1):
            yield tuple(
                self._get_cell('%s%d' % (cache[col], row))
                for col in range(min_col, max_col + 1))

    def get_named_range(self, range_string):
        """
        Returns a 2D array of cells, with optional row and column offsets.

        :param range_string: `named range` name
        :type range_string: string

        :rtype: tuples of tuples of :class:`openpyxl.cell.Cell`
        """
        named_range = self._parent.get_named_range(range_string)
        if named_range is None:
            msg = '%s is not a valid range name' % range_string
            raise NamedRangeException(msg)
        if not isinstance(named_range, NamedRange):
            msg = '%s refers to a value, not a range' % range_string
            raise NamedRangeException(msg)

        result = []
        for destination in named_range.destinations:
            worksheet, cells_range = destination

            if worksheet is not self:
                msg = 'Range %s is not defined on worksheet %s' % \
                    (cells_range, self.title)
                raise NamedRangeException(msg)

            for row in self.iter_rows(cells_range):
                result.extend(row)

        return tuple(result)

    @deprecated("""
    Use .iter_rows() working with coordinates 'A1:D4',
    and .get_squared_range() when working with indices (1, 1, 4, 4)
    and .get_named_range() for named ranges""")
    def range(self, range_string, row=0, column=0):
        """Returns a 2D array of cells, with optional row and column offsets.

        :param range_string: cell range string or `named range` name
        :type range_string: string

        :param row: number of rows to offset
        :type row: int

        :param column: number of columns to offset
        :type column: int

        :rtype: tuples of tuples of :class:`openpyxl.cell.Cell`

        """
        _rs = range_string.upper()
        m = ABSOLUTE_RE.match(_rs)
        # R1C1 range
        if m is not None:
            rows = self.iter_rows(_rs, row_offset=row, column_offset=column)
            return tuple(row for row in rows)
        else:
            return self.get_named_range(range_string)

    @deprecated("Access styles directly from cells, columns or rows")
    def get_style(self, coordinate):
        """Return a copy of the style object for the specified cell."""
        try:
            obj = self[coordinate]
        except ValueError:
            if isinstance(coordinate, int):
                obj = self.row_dimensions[obj]
            else:
                obj = self.column_dimensions[obj]
        return obj.style

    @deprecated("Set styles directly on cells, columns or rows")
    def set_style(self, coordinate, style):
        try:
            obj = self[coordinate]
        except ValueError:
            if isinstance(coordinate, int):
                obj = self.row_dimensions[obj]
            else:
                obj = self.column_dimensions[obj]
        obj.style = style

    def set_printer_settings(self, paper_size, orientation):
        """Set printer settings """

        self.page_setup.paperSize = paper_size
        if orientation not in (self.ORIENTATION_PORTRAIT,
                               self.ORIENTATION_LANDSCAPE):
            raise ValueError(
                "Values should be %s or %s" %
                (self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE))
        self.page_setup.orientation = orientation

    @deprecated('this method is private and should not be called directly')
    def create_relationship(self, rel_type):
        return self._create_relationship(rel_type)

    def _create_relationship(self, rel_type):
        """Add a relationship for this sheet."""
        rel = Relationship(rel_type)
        self.relationships.append(rel)
        rel_id = self.relationships.index(rel)
        rel.id = 'rId' + str(rel_id + 1)
        return self.relationships[rel_id]

    def add_data_validation(self, data_validation):
        """ Add a data-validation object to the sheet.  The data-validation
            object defines the type of data-validation to be applied and the
            cell or range of cells it should apply to.
        """
        data_validation._sheet = self
        self._data_validations.append(data_validation)

    def add_chart(self, chart):
        """ Add a chart to the sheet """
        chart._sheet = self
        self._charts.append(chart)
        self.add_drawing(chart)

    def add_image(self, img):
        """ Add an image to the sheet """
        img._sheet = self
        self._images.append(img)
        self.add_drawing(img)

    def add_drawing(self, obj):
        """Images and charts both create drawings"""
        self._parent.drawings.append(obj)

    def add_rel(self, obj):
        """Drawings and hyperlinks create relationships"""
        self._parent.relationships.append(obj)

    def merge_cells(self,
                    range_string=None,
                    start_row=None,
                    start_column=None,
                    end_row=None,
                    end_column=None):
        """ Set merge on a cell range.  Range is a cell range (e.g. A1:E1) """
        if not range_string:
            if (start_row is None or start_column is None or end_row is None
                    or end_column is None):
                msg = "You have to provide a value either for "\
                    "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'"
                raise InsufficientCoordinatesException(msg)
            else:
                range_string = '%s%s:%s%s' % (
                    get_column_letter(start_column), start_row,
                    get_column_letter(end_column), end_row)
        elif ":" not in range_string:
            if COORD_RE.match(range_string):
                return  # Single cell
            msg = "Range must be a cell range (e.g. A1:E1)"
            raise InsufficientCoordinatesException(msg)
        else:
            range_string = range_string.replace('$', '')

        if range_string not in self._merged_cells:
            self._merged_cells.append(range_string)

        cells = cells_from_range(range_string)
        # only the top-left cell is preserved
        for c in islice(chain.from_iterable(cells), 1, None):
            if c in self._cells:
                del self._cells[c]

    @property
    def merged_cells(self):
        """Utility for checking whether a cell has been merged or not"""
        cells = set()
        for _range in self._merged_cells:
            for row in cells_from_range(_range):
                cells = cells.union(set(row))
        return cells

    @property
    def merged_cell_ranges(self):
        """Public attribute for which cells have been merged"""
        return self._merged_cells

    def unmerge_cells(self,
                      range_string=None,
                      start_row=None,
                      start_column=None,
                      end_row=None,
                      end_column=None):
        """ Remove merge on a cell range.  Range is a cell range (e.g. A1:E1) """
        if not range_string:
            if start_row is None or start_column is None or end_row is None or end_column is None:
                msg = "You have to provide a value either for "\
                    "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'"
                raise InsufficientCoordinatesException(msg)
            else:
                range_string = '%s%s:%s%s' % (
                    get_column_letter(start_column), start_row,
                    get_column_letter(end_column), end_row)
        elif len(range_string.split(':')) != 2:
            msg = "Range must be a cell range (e.g. A1:E1)"
            raise InsufficientCoordinatesException(msg)
        else:
            range_string = range_string.replace('$', '')

        if range_string in self._merged_cells:
            self._merged_cells.remove(range_string)

        else:
            msg = 'Cell range %s not known as merged.' % range_string
            raise InsufficientCoordinatesException(msg)

    def append(self, iterable):
        """Appends a group of values at the bottom of the current sheet.

        * If it's a list: all values are added in order, starting from the first column
        * If it's a dict: values are assigned to the columns indicated by the keys (numbers or letters)

        :param iterable: list, range or generator, or dict containing values to append
        :type iterable: list/tuple/range/generator or dict

        Usage:

        * append(['This is A1', 'This is B1', 'This is C1'])
        * **or** append({'A' : 'This is A1', 'C' : 'This is C1'})
        * **or** append({1 : 'This is A1', 3 : 'This is C1'})

        :raise: TypeError when iterable is neither a list/tuple nor a dict

        """
        row_idx = self.max_row + 1

        if (isinstance(iterable, (list, tuple, range))
                or isgenerator(iterable)):
            for col_idx, content in enumerate(iterable, 1):
                col = get_column_letter(col_idx)
                if isinstance(content, Cell):
                    # compatible with write-only mode
                    cell = content
                    cell.parent = self
                    cell.column = col
                    cell.row = row_idx
                    cell.coordinate = "%s%s" % (col, row_idx)
                    self._add_cell(cell)
                else:
                    cell = self._new_cell(col, row_idx, content)

        elif isinstance(iterable, dict):
            for col_idx, content in iteritems(iterable):
                if isinstance(col_idx, basestring):
                    col_idx = column_index_from_string(col_idx)
                self.cell(row=row_idx, column=col_idx, value=content)

        else:
            self._invalid_row(iterable)
        self.row_dimensions[row_idx] = RowDimension(worksheet=self,
                                                    index=row_idx)

    def _invalid_row(self, iterable):
        raise TypeError(
            'Value must be a list, tuple, range or generator, or a dict. Supplied value is {0}'
            .format(type(iterable)))

    @property
    def rows(self):
        """Iterate over all rows in the worksheet"""
        return tuple(self.iter_rows())

    @property
    def columns(self):
        """Iterate over all columns in the worksheet"""
        max_row = self.max_row
        min_row = 1
        cols = []
        for col_idx in range(self.max_column):
            cells = self.get_squared_range(col_idx + 1, min_row, col_idx + 1,
                                           max_row)
            col = chain.from_iterable(cells)
            cols.append(tuple(col))
        return tuple(cols)

    def point_pos(self, left=0, top=0):
        """ tells which cell is under the given coordinates (in pixels)
        counting from the top-left corner of the sheet.
        Can be used to locate images and charts on the worksheet """
        current_col = 1
        current_row = 1
        column_dimensions = self.column_dimensions
        row_dimensions = self.row_dimensions
        default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH)
        default_height = points_to_pixels(DEFAULT_ROW_HEIGHT)
        left_pos = 0
        top_pos = 0

        while left_pos <= left:
            letter = get_column_letter(current_col)
            current_col += 1
            if letter in column_dimensions:
                cdw = column_dimensions[letter].width
                if cdw is not None:
                    left_pos += points_to_pixels(cdw)
                    continue
            left_pos += default_width

        while top_pos <= top:
            row = current_row
            current_row += 1
            if row in row_dimensions:
                rdh = row_dimensions[row].height
                if rdh is not None:
                    top_pos += points_to_pixels(rdh)
                    continue
            top_pos += default_height

        return (letter, row)