Ejemplo n.º 1
0
def get_text(rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS)
    partial_text = text_node.text or unicode('')

    if text_node.get('{%s}space' % XML_NS) != 'preserve':
        partial_text = partial_text.strip()
    return unicode(partial_text)
Ejemplo n.º 2
0
def get_text(rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS)
    text = text_node.text or unicode('')

    if text_node.get('{%s}space' % XML_NS) != 'preserve':
        text = text.strip()

    # fix XML escaping sequence for '_x'
    text = text.replace('x005F_', '')
    return unicode(text)
Ejemplo n.º 3
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == Cell.TYPE_BOOL:
         return self._value == '1'
     elif self.is_date:
         return from_excel(self._value, self.base_date)
     elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == Cell.TYPE_STRING:
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Ejemplo n.º 4
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == 'n':
         if is_date_format(self.number_format):
             return from_excel(self._value, self.base_date)
         return self._value
     if self.data_type == 'b':
         return self._value == '1'
     elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == 's':
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Ejemplo n.º 5
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     if value is None:
         return
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     if next(ILLEGAL_CHARACTERS_RE.finditer(value), None):
         raise IllegalCharacterError
     return value
Ejemplo n.º 6
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     if ILLEGAL_CHARACTERS_RE.match(value):
         raise IllegalCharacterError
     # we require that newline is represented as "\n" in core,
     # not as "\r\n" or "\r"
     value = value.replace('\r\n', '\n')
     return value
Ejemplo n.º 7
0
    def __str__(self):
        """
        Pack parts into a single string
        """
        TRANSFORM = {'&[Tab]': '&A', '&[Pages]': '&N', '&[Date]': '&D',
                     '&[Path]': '&Z', '&[Page]': '&P', '&[Time]': '&T', '&[File]': '&F',
                     '&[Picture]': '&G'}

        # escape keys and create regex
        SUBS_REGEX = re.compile("|".join(["({0})".format(re.escape(k))
                                          for k in TRANSFORM]))

        def replace(match):
            """
            Callback for re.sub
            Replace expanded control with mini-format equivalent
            """
            sub = match.group(0)
            return TRANSFORM[sub]

        txt = []
        for key, part in zip(
            self.__keys, [self.left, self.center, self.right]):
            if part.text is not None:
                txt.append(u"&{0}{1}".format(key, unicode(part)))
        txt = "".join(txt)
        txt = SUBS_REGEX.sub(replace, txt)
        return escape(txt)
Ejemplo n.º 8
0
 def to_tree(self, tagname):
     """
     Return as XML node
     """
     el = Element(tagname)
     el.text = unicode(self)
     return el
Ejemplo n.º 9
0
    def set_formula(self, addr, formula):
        if addr in self.cellmap:
            cell = self.cellmap[addr]
        else:
            raise Exception('Cell %s not in cellmap' % addr)

        seeds = [cell]

        if cell.is_range:
            for index, c in enumerate(cell.range.cells): # for each cell of the range, translate the formula
                if index == 0:
                    c.formula = formula
                    translator = Translator(unicode('=' +    formula), c.address().split('!')[1]) # the Translator needs a reference without sheet
                else:
                    translated = translator.translate_formula(c.address().split('!')[1]) # the Translator needs a reference without sheet
                    c.formula = translated[1:] # to get rid of the '='

                seeds.append(c)
        else:
            cell.formula = formula

        cellmap, G = graph_from_seeds(seeds, self)

        self.cellmap = cellmap
        self.G = G

        should_eval = self.cellmap[addr].should_eval
        self.cellmap[addr].should_eval = 'always'
        self.evaluate(addr)
        self.cellmap[addr].should_eval = should_eval

        print("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap)))
Ejemplo n.º 10
0
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile
    FUT = _get_xml_iter
    s = ""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert isinstance(stream, tempfile), type(stream)
    f.close()

    from zipfile import ZipFile
    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    z.close()
    def test_xfs_fonts(self):
        st = Style(font=Font(size=12, bold=True))
        self.worksheet.cell('A1').style = st
        w = StyleWriter(self.workbook)

        nft = borders = fills = DummyElement()
        fonts = Element("fonts")
        w._write_cell_xfs(nft, fonts, fills, borders)
        xml = unicode(tostring(w._root))
        assert """applyFont="1" """ in xml
        assert """fontId="1" """ in xml

        expected = """
        <fonts count="2">
        <font>
            <sz val="12.0" />
            <color rgb="00000000"></color>
            <name val="Calibri" />
            <family val="2" />
            <b></b>
        </font>
        </fonts>
        """
        xml = tostring(fonts)
        diff = compare_xml(xml, expected)
        assert diff is None, diff
Ejemplo n.º 12
0
def test_read_complex_formulae(datadir):
    datadir.join("reader").chdir()
    wb = load_workbook("formulae.xlsx")
    ws = wb.get_active_sheet()

    # Test normal forumlae
    assert ws.cell("A1").data_type != "f"
    assert ws.cell("A2").data_type != "f"
    assert ws.cell("A3").data_type == "f"
    assert "A3" not in ws.formula_attributes
    assert ws.cell("A3").value == "=12345"
    assert ws.cell("A4").data_type == "f"
    assert "A4" not in ws.formula_attributes
    assert ws.cell("A4").value == "=A2+A3"
    assert ws.cell("A5").data_type == "f"
    assert "A5" not in ws.formula_attributes
    assert ws.cell("A5").value == "=SUM(A2:A4)"

    # Test unicode
    expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)'
    # Hack to prevent pytest doing it's own unicode conversion
    try:
        expected = unicode(expected, "UTF8")
    except TypeError:
        pass
    assert ws["A16"].value == expected

    # Test shared forumlae
    assert ws.cell("B7").data_type == "f"
    assert ws.formula_attributes["B7"]["t"] == "shared"
    assert ws.formula_attributes["B7"]["si"] == "0"
    assert ws.formula_attributes["B7"]["ref"] == "B7:E7"
    assert ws.cell("B7").value == "=B4*2"
    assert ws.cell("C7").data_type == "f"
    assert ws.formula_attributes["C7"]["t"] == "shared"
    assert ws.formula_attributes["C7"]["si"] == "0"
    assert "ref" not in ws.formula_attributes["C7"]
    assert ws.cell("C7").value == "="
    assert ws.cell("D7").data_type == "f"
    assert ws.formula_attributes["D7"]["t"] == "shared"
    assert ws.formula_attributes["D7"]["si"] == "0"
    assert "ref" not in ws.formula_attributes["D7"]
    assert ws.cell("D7").value == "="
    assert ws.cell("E7").data_type == "f"
    assert ws.formula_attributes["E7"]["t"] == "shared"
    assert ws.formula_attributes["E7"]["si"] == "0"
    assert "ref" not in ws.formula_attributes["E7"]
    assert ws.cell("E7").value == "="

    # Test array forumlae
    assert ws.cell("C10").data_type == "f"
    assert "ref" not in ws.formula_attributes["C10"]["ref"]
    assert ws.formula_attributes["C10"]["t"] == "array"
    assert "si" not in ws.formula_attributes["C10"]
    assert ws.formula_attributes["C10"]["ref"] == "C10:C14"
    assert ws.cell("C10").value == "=SUM(A10:A14*B10:B14)"
    assert ws.cell("C11").data_type != "f"
Ejemplo n.º 13
0
def test_read_complex_formulae():
    null_file = os.path.join(DATADIR, 'reader', 'formulae.xlsx')
    wb = load_workbook(null_file)
    ws = wb.get_active_sheet()

    # Test normal forumlae
    assert ws.cell('A1').data_type != 'f'
    assert ws.cell('A2').data_type != 'f'
    assert ws.cell('A3').data_type == 'f'
    assert 'A3' not in ws.formula_attributes
    assert ws.cell('A3').value == '=12345'
    assert ws.cell('A4').data_type == 'f'
    assert 'A4' not in ws.formula_attributes
    assert ws.cell('A4').value == '=A2+A3'
    assert ws.cell('A5').data_type == 'f'
    assert 'A5' not in ws.formula_attributes
    assert ws.cell('A5').value == '=SUM(A2:A4)'

    # Test unicode
    expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)'
    # Hack to prevent pytest doing it's own unicode conversion
    try:
        expected = unicode(expected, "UTF8")
    except TypeError:
        pass
    assert ws['A16'].value == expected

    # Test shared forumlae
    assert ws.cell('B7').data_type == 'f'
    assert ws.formula_attributes['B7']['t'] == 'shared'
    assert ws.formula_attributes['B7']['si'] == '0'
    assert ws.formula_attributes['B7']['ref'] == 'B7:E7'
    assert ws.cell('B7').value == '=B4*2'
    assert ws.cell('C7').data_type == 'f'
    assert ws.formula_attributes['C7']['t'] == 'shared'
    assert ws.formula_attributes['C7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['C7']
    assert ws.cell('C7').value == '='
    assert ws.cell('D7').data_type == 'f'
    assert ws.formula_attributes['D7']['t'] == 'shared'
    assert ws.formula_attributes['D7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['D7']
    assert ws.cell('D7').value == '='
    assert ws.cell('E7').data_type == 'f'
    assert ws.formula_attributes['E7']['t'] == 'shared'
    assert ws.formula_attributes['E7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['E7']
    assert ws.cell('E7').value == '='

    # Test array forumlae
    assert ws.cell('C10').data_type == 'f'
    assert 'ref' not in ws.formula_attributes['C10']['ref']
    assert ws.formula_attributes['C10']['t'] == 'array'
    assert 'si' not in ws.formula_attributes['C10']
    assert ws.formula_attributes['C10']['ref'] == 'C10:C14'
    assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)'
    assert ws.cell('C11').data_type != 'f'
Ejemplo n.º 14
0
def get_string(string_index_node):
    """Read the contents of a specific string index"""
    rich_nodes = string_index_node.findall('{%s}r' % SHEET_MAIN_NS)
    if rich_nodes:
        reconstructed_text = []
        for rich_node in rich_nodes:
            partial_text = get_text(rich_node)
            reconstructed_text.append(partial_text)
        return unicode(''.join(reconstructed_text))
    return get_text(string_index_node)
Ejemplo n.º 15
0
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]
        current_row = min_row

        style_table = self._style_table
        for row, cells in groupby(self.get_cells(min_row, min_col,
                                                 max_row, max_col),
                                  operator.attrgetter('row')):
            full_row = []
            if current_row < row:

                for gap_row in xrange(current_row, row):
                    dummy_cells = get_missing_cells(gap_row, expected_columns)
                    yield tuple([dummy_cells[column] for column in expected_columns])
                    current_row = row

            temp_cells = list(cells)
            retrieved_columns = dict([(c.column, c) for c in temp_cells])
            missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))
            replacement_columns = get_missing_cells(row, missing_columns)

            for column in expected_columns:
                if column in retrieved_columns:
                    cell = retrieved_columns[column]
                    if cell.style_id is not None:
                        style = style_table[int(cell.style_id)]
                        cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212
                    if cell.internal_value is not None:
                        if cell.data_type in Cell.TYPE_STRING:
                            cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212
                        elif cell.data_type == Cell.TYPE_BOOL:
                            cell = cell._replace(internal_value=cell.internal_value == '1')
                        elif cell.is_date:
                            cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value)))
                        elif cell.data_type == Cell.TYPE_NUMERIC:
                            cell = cell._replace(internal_value=float(cell.internal_value))
                        elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
                            cell = cell._replace(internal_value=unicode(cell.internal_value))
                    full_row.append(cell)
                else:
                    full_row.append(replacement_columns[column])
            current_row = row + 1
            yield tuple(full_row)
Ejemplo n.º 16
0
def doTransfer(file_name,db_name):
    # Replace with a database name
    con = sqlite3.connect(db_name)
    # replace with the complete path to youe excel workbook
    wb = load_workbook(filename=file_name)

    sheets = wb.sheetnames

    for sheet in sheets:
        ws = wb[sheet]

        columns = []
        query = 'CREATE TABLE ' + str(slugify(sheet)) + '(ID INTEGER PRIMARY KEY AUTOINCREMENT'
        for row in next(ws.rows):
            query += ', ' + slugify(row.value) + ' TEXT'
            columns.append(slugify(row.value))
        query += ');'

        con.execute(query)

        tup = []
        for i, rows in enumerate(ws):
            tuprow = []
            if i == 0:
                continue
            for row in rows:
                tuprow.append(unicode(row.value).strip()) if unicode(row.value).strip() != 'None' else tuprow.append('')
            tup.append(tuple(tuprow))

        insQuery1 = 'INSERT INTO ' + str(slugify(sheet)) + '('
        insQuery2 = ''
        for col in columns:
            insQuery1 += col + ', '
            insQuery2 += '?, '
        insQuery1 = insQuery1[:-2] + ') VALUES('
        insQuery2 = insQuery2[:-2] + ')'
        insQuery = insQuery1 + insQuery2

        con.executemany(insQuery, tup)
        con.commit()

    con.close()
Ejemplo n.º 17
0
def to_str(my_string):
    # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
    if type(my_string) == str and str != unicode:
        return unicode(my_string, 'utf-8')
    elif type(my_string) == unicode:
        return my_string
    else:
        try:
            return str(my_string)
        except:
            print('Couldnt parse as string', type(my_string))
            return my_string
Ejemplo n.º 18
0
    def __init__(self, address, sheet = None, value=None, formula=None, is_range = False, is_named_range=False, should_eval='normal'):
        super(Cell,self).__init__()

        if is_named_range == False:

            # remove $'s
            address = address.replace('$','')

            sh,c,r = split_address(address)

            # both are empty
            if not sheet and not sh:
                raise Exception("Sheet name may not be empty for cell address %s" % address)
            # both exist but disagree
            elif sh and sheet and sh != sheet:
                raise Exception("Sheet name mismatch for cell address %s: %s vs %s" % (address,sheet, sh))
            elif not sh and sheet:
                sh = sheet
            else:
                pass

            # we assume a cell's location can never change
            self.__sheet = sheet.encode('utf-8') if sheet is not None else sheet

            self.__sheet = sh
            self.__col = c
            self.__row = int(r)
            self.__col_idx = col2num(c)

        else:
            self.__named_range = address
            self.__sheet = None
            self.__col = None
            self.__row = None
            self.__col_idx = None

        # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
        if type(formula) == str and str != unicode:
            self.__formula = unicode(formula, 'utf-8') if formula else None
        else:
            self.__formula = formula if formula else None

        self.__value = value
        self.python_expression = None
        self.need_update = False
        self.should_eval = should_eval
        self.__compiled_expression = None
        self.__is_range = is_range

        # every cell has a unique id
        self.__id = Cell.next_id()
Ejemplo n.º 19
0
def _add_table_headers(ws):
    """
    Check if tables have tableColumns and create them and autoFilter if necessary.
    Column headers will be taken from the first row of the table.
    """

    tables = TablePartList()

    for table in ws._tables:
        if not table.tableColumns:
            table._initialise_columns()
            if table.headerRowCount:
                row = ws[table.ref][0]
                for cell, col in zip(row, table.tableColumns):
                    if cell.data_type != "s":
                        warn("File may not be readable: column headings must be strings.")
                    col.name = unicode(cell.value)
        rel = Relationship(Type=table._rel_type, Target="")
        ws._rels.append(rel)
        table._rel_id = rel.Id
        tables.append(Related(id=rel.Id))

    return tables
Ejemplo n.º 20
0
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile

    FUT = _get_xml_iter
    s = b""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert stream == f
    f.close()

    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    # z.close()
    try:
        z.close()
    except IOError:
        # you can't just close zipfiles in Windows
        if z.fp is not None:
            z.fp.close() # python 2.6
        else:
            z.close() # python 2.7
Ejemplo n.º 21
0
def resolve_range(rng, should_flatten = False, sheet=''):
    # print 'RESOLVE RANGE splitting', rng
    if ':' not in rng:
        if '!' in rng:
            rng = rng.split('!')
        return ExcelError('#REF!', info = '%s is not a regular range, nor a named_range' % rng)
    sh, start, end = split_range(rng)

    if sh and sheet:
        if sh != sheet:
            raise Exception("Mismatched sheets %s and %s" % (sh,sheet))
        else:
            sheet += '!'
    elif sh and not sheet:
        sheet = sh + "!"
    elif sheet and not sh:
        sheet += "!"
    else:
        pass

    # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode`
    if type(sheet) == str and str != unicode:
        sheet = unicode(sheet, 'utf-8')
    if type(rng) == str and str != unicode:
        rng = unicode(rng, 'utf-8')

    key = rng+str(should_flatten)+sheet

    if key in resolve_range_cache:
        return resolve_range_cache[key]
    else:
        if not is_range(rng):  return ([sheet + rng],1,1)
        # single cell, no range
        if start.isdigit() and end.isdigit():
            # This copes with 1:1 style ranges
            start_col = "A"
            start_row = start
            end_col = "XFD"
            end_row = end
        elif start.isalpha() and end.isalpha():
            # This copes with A:A style ranges
            start_col = start
            start_row = 1
            end_col = end
            end_row = 2**20
        else:
            sh, start_col, start_row = split_address(start)
            sh, end_col, end_row = split_address(end)

        start_col_idx = col2num(start_col)
        end_col_idx = col2num(end_col);

        start_row = int(start_row)
        end_row = int(end_row)

        # Attempt to use Numpy, not relevant for now

        # num2col_vec = np.vectorize(num2col)
        # r = np.array([range(start_row, end_row + 1),]*nb_col, dtype='a5').T
        # c = num2col_vec(np.array([range(start_col_idx, end_col_idx + 1),]*nb_row))
        # if len(sheet)>0:
        #     s = np.chararray((nb_row, nb_col), itemsize=len(sheet))
        #     s[:] = sheet
        #     c = np.core.defchararray.add(s, c)
        # B = np.core.defchararray.add(c, r)


        # if start_col == end_col:
        #     data = B.T.tolist()[0]
        #     return data, len(data), 1
        # elif start_row == end_row:
        #     data = B.tolist()[0]
        #     return data, 1, len(data)
        # else:
        #     if should_flatten:
        #         return B.flatten().tolist(), 1, nb_col*nb_row
        #     else:
        #         return B.tolist(), nb_row, nb_col

        # single column
        if  start_col == end_col:
            nrows = end_row - start_row + 1
            data = [ "%s%s%s" % (s,c,r) for (s,c,r) in zip([sheet]*nrows,[start_col]*nrows,list(range(start_row,end_row+1)))]

            output = data,len(data),1

        # single row
        elif start_row == end_row:
            ncols = end_col_idx - start_col_idx + 1
            data = [ "%s%s%s" % (s,num2col(c),r) for (s,c,r) in zip([sheet]*ncols,list(range(start_col_idx,end_col_idx+1)),[start_row]*ncols)]
            output = data,1,len(data)

        # rectangular range
        else:
            cells = []
            for r in range(start_row,end_row+1):
                row = []
                for c in range(start_col_idx,end_col_idx+1):
                    row.append(sheet + num2col(c) + str(r))

                cells.append(row)

            if should_flatten:
                # flatten into one list
                l = list(flatten(cells, only_lists = True))
                output = l,len(cells), len(cells[0])
            else:
                output = cells, len(cells), len(cells[0])

        resolve_range_cache[key] = output
        return output
Ejemplo n.º 22
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except UnicodeDecodeError:
         return u"#N/A"
Ejemplo n.º 23
0
 def __repr__(self):
     return unicode(self)
Ejemplo n.º 24
0
 def __repr__(self):
     return unicode("<Cell %s.%s>") % (self.parent.title, self.coordinate)
Ejemplo n.º 25
0
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    is_file_instance = isinstance(filename, file)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types, data_only=data_only)

    if use_iterators:
        wb._set_optimized_read()
        if guess_types:
            warnings.warn('Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
Ejemplo n.º 26
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except:
         return unicode('#N/A')
Ejemplo n.º 27
0
def load_workbook(filename, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    is_file_instance = isinstance(filename, file)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        file_format = os.path.splitext(filename)[-1]
        if file_format not in SUPPORTED_FORMATS:
            if file_format == '.xls':
                msg = ('openpyxl does not support the old .xls file format, '
                       'please use xlrd to read this file, or convert it to '
                       'the more recent .xlsx file format.')
            elif file_format == '.xlsb':
                msg = ('openpyxl does not support binary format .xlsb, '
                       'please convert this file to .xlsx format if you want '
                       'to open it with openpyxl')
            else:
                msg = ('openpyxl does not support %s file format, '
                       'please check you can open '
                       'it with Excel first. '
                       'Supported formats are: %s') % (file_format,
                                                     ','.join(SUPPORTED_FORMATS))
            raise InvalidFileException(msg)

        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types, data_only=data_only)

    if use_iterators:
        wb._set_optimized_read()
        if guess_types:
            warnings.warn('Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb