コード例 #1
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_read_row(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()

    src = b"""
    <sheetData  xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" >
    <row r="1" spans="4:27">
      <c r="D1">
        <v>1</v>
      </c>
      <c r="K1">
        <v>0.01</v>
      </c>
      <c r="AA1">
        <v>100</v>
      </c>
    </row>
    </sheetData>
    """

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "bug393-worksheet.xml", [])

    xml = fromstring(src)
    row = tuple(ws._get_row(xml, 11, 11))
    values = [c.value for c in row]
    assert values == [0.01]

    row = tuple(ws._get_row(xml, 1, 11))
    values = [c.value for c in row]
    assert values == [None, None, None, 1, None, None, None, None, None, None, 0.01]
コード例 #2
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_force_dimension(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "sheet2_no_dimension.xml", [])

    dims = ws.calculate_dimension(True)
    assert dims == "A1:AA30"
コード例 #3
0
ファイル: test_iter.py プロジェクト: mysonhushu/openpyxl
def test_get_empty_cells_nonempty_row(datadir, DummyWorkbook,
                                      ReadOnlyWorksheet):
    """Fix for issue #908.

    Get row slice which only contains empty cells in a row containing non-empty
    cells earlier in the row.
    """

    datadir.join("reader").chdir()

    src = b"""
    <sheetData  xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" >
    <row r="1" spans="4:27">
      <c r="A4">
        <v>1</v>
      </c>
    </row>
    </sheetData>
    """

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", [])

    xml = fromstring(src)

    min_col = 8
    max_col = 9
    row = tuple(ws._get_row(xml, min_col=min_col, max_col=max_col))

    assert len(row) == 2
    assert all(cell is EMPTY_CELL for cell in row)
    values = [cell.value for cell in row]
    assert values == [None, None]
コード例 #4
0
def test_read_without_coordinates(DummyWorkbook, ReadOnlyWorksheet):

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", ["Whatever"] * 10)
    src = """
    <row xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
      <c t="s">
        <v>2</v>
      </c>
      <c t="s">
        <v>4</v>
      </c>
      <c t="s">
        <v>3</v>
      </c>
      <c t="s">
        <v>6</v>
      </c>
      <c t="s">
        <v>9</v>
      </c>
    </row>
    """

    element = fromstring(src)
    row = tuple(ws._get_row(element, min_col=1, max_col=None, row_counter=1))
    assert row[0].value == "Whatever"
コード例 #5
0
ファイル: test_iter.py プロジェクト: nickpell/openpyxl
def test_get_max_cell(datadir, DummyWorkbook, ReadOnlyWorksheet, filename):
    datadir.join("reader").chdir()
    DummyWorkbook._archive.write(filename, "sheet1.xml")

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "sheet1.xml", [])
    ws._shared_strings = ['A', 'B']
    rows = tuple(ws.rows)
    assert rows[-1][-1].coordinate == "AA30"
コード例 #6
0
ファイル: test_iter.py プロジェクト: mysonhushu/openpyxl
def test_iter_rows_empty_rows(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "empty_rows.xml", [])

    rows = tuple(ws.iter_rows(min_row=1, min_col=1, max_row=10, max_col=10))
    assert len(rows) == 7
    assert rows[0][0].value is None
    assert rows[6][6].value is None
コード例 #7
0
ファイル: test_iter.py プロジェクト: nickpell/openpyxl
def test_force_dimension(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()
    wb = DummyWorkbook
    wb._archive.write("sheet2_no_dimension.xml", "sheet1.xml")

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "sheet1.xml", [])
    ws._shared_strings = ['A', 'B']

    dims = ws.calculate_dimension(True)
    assert dims == "A1:AA30"
コード例 #8
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_read_empty_row(datadir, DummyWorkbook, ReadOnlyWorksheet):

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", [])

    src = """
    <row r="2" xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" />
    """
    element = fromstring(src)
    row = ws._get_row(element, max_col=10)
    row = tuple(row)
    assert len(row) == 10
コード例 #9
0
def test_read_cell_from_empty_row(DummyWorkbook, ReadOnlyWorksheet, row, column):
    src = BytesIO()
    src.write(b"""<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
    <sheetData>
      <row r="2" />
      <row r="4" />
    </sheetData>
    </worksheet>
    """)
    src.seek(0)
    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", [])
    ws._xml = src
    cell = ws._get_cell(row, column)
    assert cell is EMPTY_CELL
コード例 #10
0
ファイル: test_iter.py プロジェクト: nickpell/openpyxl
def test_read_hyperlinks_read_only(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()
    wb = DummyWorkbook
    wb._archive.write("bug393-worksheet.xml", "sheet1.xml")

    ws = ReadOnlyWorksheet(wb, "Sheet", "sheet1.xml", ['SOMETEXT'])
    assert ws['F2'].value is None
コード例 #11
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_read_hyperlinks_read_only(datadir, Workbook, ReadOnlyWorksheet):

    datadir.join("reader").chdir()
    filename = 'bug328_hyperlinks.xml'
    ws = ReadOnlyWorksheet(Workbook(data_only=True, read_only=True), "Sheet",
                           "", filename, ['SOMETEXT'])
    assert ws['F2'].value is None
コード例 #12
0
ファイル: test_iter.py プロジェクト: nickpell/openpyxl
def test_ctor(datadir, DummyWorkbook, ReadOnlyWorksheet, filename, expected):
    datadir.join("reader").chdir()
    wb = DummyWorkbook
    wb._archive.write(filename, "sheet1.xml")
    with open(filename) as src:
        ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "sheet1.xml", [])
    assert (ws.min_row, ws.min_column, ws.max_row, ws.max_column) == expected
コード例 #13
0
ファイル: test_iter.py プロジェクト: nickpell/openpyxl
def test_read_empty_rows(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()
    wb = DummyWorkbook
    wb._archive.write("empty_rows.xml", "sheet1.xml")

    ws = ReadOnlyWorksheet(wb, "Sheet", "sheet1.xml", [])
    rows = tuple(ws.rows)
    assert len(rows) == 7
コード例 #14
0
ファイル: test_iter.py プロジェクト: nickpell/openpyxl
def test_read_with_missing_cells(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()
    wb = DummyWorkbook
    wb._archive.write("bug393-worksheet.xml", "sheet1.xml")

    ws = ReadOnlyWorksheet(wb, "Sheet", "sheet1.xml", [])
    rows = tuple(ws.rows)

    row = rows[1]  # second row
    values = [c.value for c in row]
    assert values == [None, None, 1, 2, 3]

    row = rows[3]  # fourth row
    values = [c.value for c in row]
    assert values == [1, 2, None, None, 3]
コード例 #15
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_read_with_missing_cells(datadir, DummyWorkbook, ReadOnlyWorksheet):
    datadir.join("reader").chdir()

    filename = "bug393-worksheet.xml"

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", filename, [])
    rows = tuple(ws.rows)

    row = rows[1] # second row
    values = [c.value for c in row]
    assert values == [None, None, 1, 2, 3]

    row = rows[3] # fourth row
    values = [c.value for c in row]
    assert values == [1, 2, None, None, 3]
コード例 #16
0
ファイル: excel.py プロジェクト: atom-chen/layaTemplate
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
                  data_only=False, guess_types=False, keep_links=True):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :param keep_links: whether links to external workbooks should be preserved. The default is True
    :type keep_links: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only

    src = archive.read(ARC_CONTENT_TYPES)
    root = fromstring(src)
    package = Manifest.from_tree(root)

    wb_part = _find_workbook_part(package)
    parser = WorkbookParser(archive, wb_part.PartName[1:])
    wb = parser.wb
    wb._data_only = data_only
    wb._read_only = read_only
    wb._keep_links = keep_links
    wb.guess_types = guess_types
    wb.template = wb_part.ContentType in (XLTX, XLTM)
    parser.parse()
    wb._sheets = []

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
        for name in archive.namelist():
            wb.vba_archive.writestr(name, archive.read(name))


    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    if ARC_CORE in valid_files:
        src = fromstring(archive.read(ARC_CORE))
        wb.properties = DocumentProperties.from_tree(src)


    shared_strings = []
    ct = package.find(SHARED_STRINGS)
    if ct is not None:
        strings_path = ct.PartName[1:]
        shared_strings = read_string_table(archive.read(strings_path))


    if ARC_THEME in valid_files:
        wb.loaded_theme = archive.read(ARC_THEME)

    apply_stylesheet(archive, wb) # bind styles to workbook
    pivot_caches = parser.pivot_caches

    # get worksheets
    for sheet, rel in parser.find_sheets():
        sheet_name = sheet.name
        worksheet_path = rel.target
        rels_path = get_rels_path(worksheet_path)
        rels = []
        if rels_path in valid_files:
            rels = get_dependents(archive, rels_path)

        if not worksheet_path in valid_files:
            continue

        if read_only:
            ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                   shared_strings)

            wb._sheets.append(ws)
        else:
            fh = archive.open(worksheet_path)
            ws = wb.create_sheet(sheet_name)
            ws._rels = rels
            ws_parser = WorkSheetParser(ws, fh, shared_strings)
            ws_parser.parse()

            if rels:
                # assign any comments to cells
                for r in rels.find(COMMENTS_NS):
                    src = archive.read(r.target)
                    comment_sheet = CommentSheet.from_tree(fromstring(src))
                    for ref, comment in comment_sheet.comments:
                        ws[ref].comment = comment

                # preserve link to VML file if VBA
                if (
                    wb.vba_archive is not None
                    and ws.legacy_drawing is not None
                    ):
                    ws.legacy_drawing = rels[ws.legacy_drawing].target

                for t in ws_parser.tables:
                    src = archive.read(t)
                    xml = fromstring(src)
                    table = Table.from_tree(xml)
                    ws.add_table(table)

                pivot_rel = rels.find(TableDefinition.rel_type)
                for r in pivot_rel:
                    pivot_path = r.Target
                    src = archive.read(pivot_path)
                    tree = fromstring(src)
                    pivot = TableDefinition.from_tree(tree)
                    pivot.cache = pivot_caches[pivot.cacheId]
                    ws.add_pivot(pivot)

        ws.sheet_state = sheet.state
        ws._rels = [] # reset

    parser.assign_names()

    #wb._differential_styles.styles =  [] # tables may depened upon dxf

    archive.close()
    return wb
コード例 #17
0
ファイル: excel.py プロジェクト: clwater/lesson_python
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
                  data_only=False, guess_types=False, keep_links=True):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :param keep_links: whether links to external workbooks should be preserved. The default is True
    :type keep_links: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only

    parser = WorkbookParser(archive)
    wb = parser.wb
    wb._data_only = data_only
    wb._read_only = read_only
    wb._keep_links = keep_links
    wb.guess_types = guess_types
    parser.parse()
    wb._sheets = []

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
        for name in archive.namelist():
            wb.vba_archive.writestr(name, archive.read(name))


    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    if ARC_CORE in valid_files:
        src = fromstring(archive.read(ARC_CORE))
        wb.properties = DocumentProperties.from_tree(src)

    # is workbook a template or note
    src = archive.read(ARC_CONTENT_TYPES)
    root = fromstring(src)
    package = Manifest.from_tree(root)
    wb.template = XLTX in package or XLTM in package

    shared_strings = []
    ct = package.find(SHARED_STRINGS)
    if ct is not None:
        strings_path = ct.PartName[1:]
        shared_strings = read_string_table(archive.read(strings_path))


    if ARC_THEME in valid_files:
        wb.loaded_theme = archive.read(ARC_THEME)

    apply_stylesheet(archive, wb) # bind styles to workbook

    # get worksheets
    for sheet, rel in parser.find_sheets():
        sheet_name = sheet.name
        worksheet_path = rel.target
        rels_path = get_rels_path(worksheet_path)
        rels = []
        if rels_path in valid_files:
            rels = get_dependents(archive, rels_path)

        if not worksheet_path in valid_files:
            continue

        if read_only:
            ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                   shared_strings)

            wb._sheets.append(ws)
        else:
            fh = archive.open(worksheet_path)
            ws = wb.create_sheet(sheet_name)
            ws._rels = rels
            ws_parser = WorkSheetParser(ws, fh, shared_strings)
            ws_parser.parse()

            if rels:
                # assign any comments to cells
                for r in rels.find(COMMENTS_NS):
                    src = archive.read(r.target)
                    comment_sheet = CommentSheet.from_tree(fromstring(src))
                    for ref, comment in comment_sheet.comments:
                        ws[ref].comment = comment

                # preserve link to VML file if VBA
                if (
                    wb.vba_archive is not None
                    and ws.legacy_drawing is not None
                    ):
                    ws.legacy_drawing = rels[ws.legacy_drawing].target

        ws.sheet_state = sheet.state
        ws._rels = [] # reset

    parser.assign_names()

    wb._differential_styles.styles =  []

    archive.close()
    return wb
コード例 #18
0
ファイル: excel.py プロジェクト: token97/op_patch
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=False, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only or use_iterators

    wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only)

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0

    # what content types do we have?
    cts = dict(read_content_types(archive))

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    wb.is_template = XLTX in cts or XLTM in cts

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    parsed_styles = read_style_table(archive)
    if parsed_styles is not None:
        wb._differential_styles = parsed_styles.differential_styles
        wb._cell_styles = parsed_styles.cell_styles
        wb._named_styles = parsed_styles.named_styles
        wb._colors = parsed_styles.color_index
        wb._borders = parsed_styles.border_list
        wb._fonts = parsed_styles.font_list
        wb._fills = parsed_styles.fill_list
        wb._number_formats = parsed_styles.number_formats
        wb._protections = parsed_styles.protections
        wb._alignments = parsed_styles.alignments
        wb._colors = parsed_styles.color_index

    wb.excel_base_date = read_excel_base_date(archive)

    # get worksheets
    wb._sheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if read_only:
            new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                       shared_strings)
            wb._add_sheet(new_ws)
        else:
            fh = archive.open(worksheet_path)
            parser = WorkSheetParser(wb, sheet_name, fh, shared_strings)
            parser.parse()
            new_ws = wb[sheet_name]
        new_ws.sheet_state = sheet['state']

        if wb.vba_archive is not None and new_ws.legacy_drawing is not None:
            # We need to get the file name of the legacy drawing
            dirname, basename = worksheet_path.rsplit('/', 1)
            rels_path = '/'.join((dirname, '_rels', basename + '.rels'))
            rels = get_dependents(archive, rels_path)
            new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target

        if not read_only:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))
            drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
            if drawings_file is not None:
                read_drawings(new_ws, drawings_file, archive, valid_files)
    wb._differential_styles = [] # reset
    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))


    archive.close()
    return wb
コード例 #19
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_get_max_cell(datadir, DummyWorkbook, ReadOnlyWorksheet, filename):
    datadir.join("reader").chdir()

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", filename, [])
    rows = tuple(ws.rows)
    assert rows[-1][-1].coordinate == "AA30"
コード例 #20
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_ctor(datadir, DummyWorkbook, ReadOnlyWorksheet, filename, expected):
    datadir.join("reader").chdir()
    with open(filename) as src:
        ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", src, [])
    assert (ws.min_row, ws.min_column, ws.max_row, ws.max_column) == expected
コード例 #21
0
ファイル: test_iter.py プロジェクト: Pawpaw-CI/Excel2Testlink
def test_read_empty_rows(datadir, DummyWorkbook, ReadOnlyWorksheet):

    ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "empty_rows.xml", [])
    rows = tuple(ws.rows)
    assert len(rows) == 7
コード例 #22
0
ファイル: excel.py プロジェクト: nickpell/openpyxl
    def read_worksheets(self):
        for sheet, rel in self.parser.find_sheets():
            if rel.target not in self.valid_files:
                continue

            if "chartsheet" in rel.Type:
                self.read_chartsheet(sheet, rel)
                continue

            rels_path = get_rels_path(rel.target)
            rels = RelationshipList()
            if rels_path in self.valid_files:
                rels = get_dependents(self.archive, rels_path)

            if self.read_only:
                ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target,
                                       self.shared_strings)
                self.wb._sheets.append(ws)
                continue
            else:
                fh = self.archive.open(rel.target)
                ws = self.wb.create_sheet(sheet.name)
                ws._rels = rels
                ws_parser = WorksheetReader(ws, fh, self.shared_strings,
                                            self.data_only)
                ws_parser.bind_all()

            # assign any comments to cells
            for r in rels.find(COMMENTS_NS):
                src = self.archive.read(r.target)
                comment_sheet = CommentSheet.from_tree(fromstring(src))
                for ref, comment in comment_sheet.comments:
                    ws[ref].comment = comment

            # preserve link to VML file if VBA
            if self.wb.vba_archive and ws.legacy_drawing:
                ws.legacy_drawing = rels[ws.legacy_drawing].target

            for t in ws_parser.tables:
                src = self.archive.read(t)
                xml = fromstring(src)
                table = Table.from_tree(xml)
                ws.add_table(table)

            drawings = rels.find(SpreadsheetDrawing._rel_type)
            for rel in drawings:
                charts, images = find_images(self.archive, rel.target)
                for c in charts:
                    ws.add_chart(c, c.anchor)
                for im in images:
                    ws.add_image(im, im.anchor)

            pivot_rel = rels.find(TableDefinition.rel_type)
            for r in pivot_rel:
                pivot_path = r.Target
                src = self.archive.read(pivot_path)
                tree = fromstring(src)
                pivot = TableDefinition.from_tree(tree)
                pivot.cache = self.parser.pivot_caches[pivot.cacheId]
                ws.add_pivot(pivot)

            ws.sheet_state = sheet.state
コード例 #23
0
def load_workbook(filename,
                  read_only=False,
                  use_iterators=False,
                  keep_vba=KEEP_VBA,
                  guess_types=False,
                  data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only or use_iterators

    wb = Workbook(guess_types=guess_types,
                  data_only=data_only,
                  read_only=read_only)

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0

    # what content types do we have?
    cts = dict(read_content_types(archive))

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    wb.is_template = XLTX in cts or XLTM in cts

    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    parsed_styles = read_style_table(archive)
    if parsed_styles is not None:
        wb._differential_styles = parsed_styles.differential_styles
        wb._cell_styles = parsed_styles.cell_styles
        wb._named_styles = parsed_styles.named_styles
        wb._colors = parsed_styles.color_index
        wb._borders = parsed_styles.border_list
        wb._fonts = parsed_styles.font_list
        wb._fills = parsed_styles.fill_list
        wb._number_formats = parsed_styles.number_formats
        wb._protections = parsed_styles.protections
        wb._alignments = parsed_styles.alignments
        wb._colors = parsed_styles.color_index

    wb.excel_base_date = read_excel_base_date(archive)

    # get worksheets
    wb._sheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if read_only:
            new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                       shared_strings)
            wb._add_sheet(new_ws)
        else:
            fh = archive.open(worksheet_path)
            parser = WorkSheetParser(wb, sheet_name, fh, shared_strings)
            parser.parse()
            new_ws = wb[sheet_name]
        new_ws.sheet_state = sheet['state']

        if wb.vba_archive is not None and new_ws.legacy_drawing is not None:
            # We need to get the file name of the legacy drawing
            dirname, basename = worksheet_path.rsplit('/', 1)
            rels_path = '/'.join((dirname, '_rels', basename + '.rels'))
            rels = get_dependents(archive, rels_path)
            new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target

        if not read_only:
            # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._differential_styles = []  # reset
    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))

    archive.close()
    return wb