Exemple #1
0
 def test_append(self, Manifest):
     from openpyxl25 import Workbook
     wb = Workbook()
     ws = wb.active
     manifest = Manifest()
     manifest.append(ws)
     assert len(manifest.Override) == 6
Exemple #2
0
    def test_write(self, Manifest):
        mf = Manifest()
        from openpyxl25 import Workbook
        wb = Workbook()

        archive = ZipFile(BytesIO(), "w")
        mf._write(archive, wb)
        assert "/xl/workbook.xml" in mf.filenames
Exemple #3
0
    def test_media(self, Manifest, file, registration):
        from openpyxl25 import Workbook
        wb = Workbook()

        manifest = Manifest()
        manifest._register_mimetypes([file])
        xml = tostring(manifest.Default[-1].to_tree())
        diff = compare_xml(xml, registration)
        assert diff is None, diff
Exemple #4
0
    def test_write(self, DummyCache):
        out = BytesIO()
        archive = ZipFile(out, mode="w")
        manifest = Manifest()

        xml = tostring(DummyCache.to_tree())
        DummyCache._write(archive, manifest)

        assert archive.namelist() == [DummyCache.path[1:]]
        assert manifest.find(DummyCache.mime_type)
Exemple #5
0
 def __init__(self, workbook, archive):
     self._archive = archive
     self.workbook = workbook
     self.manifest = Manifest()
     self.vba_modified = set()
     self._tables = []
     self._charts = []
     self._images = []
     self._drawings = []
     self._comments = []
     self._pivots = []
Exemple #6
0
    def test_write(self, RecordList):
        out = BytesIO()
        archive = ZipFile(out, mode="w")
        manifest = Manifest()

        records = RecordList()
        xml = tostring(records.to_tree())
        records._write(archive, manifest)
        manifest.append(records)

        assert archive.namelist() == [records.path[1:]]
        assert manifest.find(records.mime_type)
Exemple #7
0
    def test_from_xml(self, datadir, Manifest):
        datadir.chdir()
        with open("manifest.xml") as src:
            node = fromstring(src.read())
        manifest = Manifest.from_tree(node)
        assert len(manifest.Default) == 2
        defaults = [
            ("application/xml", 'xml'),
            ("application/vnd.openxmlformats-package.relationships+xml", 'rels'),
        ]
        assert  [(ct.ContentType, ct.Extension) for ct in manifest.Default] == defaults

        overrides = [
            ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml',
             '/xl/workbook.xml'),
            ('application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml',
             '/xl/worksheets/sheet1.xml'),
            ('application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml',
             '/xl/chartsheets/sheet1.xml'),
            ('application/vnd.openxmlformats-officedocument.theme+xml',
             '/xl/theme/theme1.xml'),
            ('application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml',
             '/xl/styles.xml'),
            ('application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml',
             '/xl/sharedStrings.xml'),
            ('application/vnd.openxmlformats-officedocument.drawing+xml',
             '/xl/drawings/drawing1.xml'),
            ('application/vnd.openxmlformats-officedocument.drawingml.chart+xml',
             '/xl/charts/chart1.xml'),
            ('application/vnd.openxmlformats-package.core-properties+xml',
             '/docProps/core.xml'),
            ('application/vnd.openxmlformats-officedocument.extended-properties+xml',
             '/docProps/app.xml')
        ]
        assert [(ct.ContentType, ct.PartName) for ct in manifest.Override] == overrides
Exemple #8
0
 def test_findall(self, datadir, Manifest):
     datadir.chdir()
     with open("manifest.xml", "rb") as src:
         xml = src.read()
     tree = fromstring(xml)
     manifest = Manifest.from_tree(tree)
     sheets = manifest.findall(WORKSHEET_TYPE)
     assert len(list(sheets)) == 1
Exemple #9
0
 def test_exts(self, datadir, Manifest):
     datadir.chdir()
     with open("manifest.xml") as src:
         node = fromstring(src.read())
     manifest = Manifest.from_tree(node)
     assert manifest.extensions == [
         ('xml', 'application/xml'),
     ]
Exemple #10
0
 def test_find(self, datadir, Manifest):
     datadir.chdir()
     with open("manifest.xml", "rb") as src:
         xml = src.read()
     tree = fromstring(xml)
     manifest = Manifest.from_tree(tree)
     ws = manifest.find(WORKSHEET_TYPE)
     assert ws.PartName == "/xl/worksheets/sheet1.xml"
Exemple #11
0
    def test_vba(self, datadir, Manifest):
        datadir.chdir()
        from openpyxl25 import load_workbook
        wb = load_workbook('sample.xlsm', keep_vba=True)

        manifest = Manifest()
        manifest._write_vba(wb)
        partnames = set([t.PartName for t in manifest.Override])
        expected = set([
            '/xl/workbook.xml',
            '/xl/worksheets/sheet1.xml',
            '/xl/worksheets/sheet2.xml',
            '/xl/worksheets/sheet3.xml',
            '/xl/theme/theme1.xml',
            '/xl/styles.xml',
            '/docProps/core.xml',
            '/docProps/app.xml',
            '/xl/sharedStrings.xml'
                    ])
        assert partnames == expected
Exemple #12
0
def test_overwritten_default():
    from openpyxl25.reader.excel import _find_workbook_part

    src = """
    <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
      <Default Extension="xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
    </Types>
    """
    node = fromstring(src)
    package = Manifest.from_tree(node)

    assert _find_workbook_part(package) == Override("/xl/workbook.xml", XLSX)
Exemple #13
0
 def test_ctor(self, Manifest):
     manifest = Manifest()
     xml = tostring(manifest.to_tree())
     expected = """
     <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
       <Default ContentType="application/vnd.openxmlformats-package.relationships+xml" Extension="rels" />
       <Default ContentType="application/xml" Extension="xml" />
       <Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"
         PartName="/xl/sharedStrings.xml"/>
       <Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"
         PartName="/xl/styles.xml"/>
       <Override ContentType="application/vnd.openxmlformats-officedocument.theme+xml"
         PartName="/xl/theme/theme1.xml"/>
       <Override ContentType="application/vnd.openxmlformats-package.core-properties+xml"
         PartName="/docProps/core.xml"/>
       <Override ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"
         PartName="/docProps/app.xml"/>
     </Types>
     """
     diff = compare_xml(xml, expected)
     assert diff is None, diff
Exemple #14
0
def test_find_standard_workbook_part(datadir, wb_type, wb_name):
    from openpyxl25.reader.excel import _find_workbook_part

    src = """
        <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
        <Override ContentType="{0}"
          PartName="{1}"/>
        </Types>
        """.format(wb_type, wb_name)
    node = fromstring(src)
    package = Manifest.from_tree(node)

    assert _find_workbook_part(package) == Override(wb_name, wb_type)
Exemple #15
0
    def test_no_defaults(self, Manifest):
        """
        LibreOffice does not use the Default element
        """
        xml = """
        <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
           <Override PartName="/_rels/.rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
        </Types>
        """

        node = fromstring(xml)
        manifest = Manifest.from_tree(node)
        exts = manifest.extensions

        assert exts == []
Exemple #16
0
 def test_filenames(self, datadir, Manifest):
     datadir.chdir()
     with open("manifest.xml") as src:
         node = fromstring(src.read())
     manifest = Manifest.from_tree(node)
     assert manifest.filenames == [
         '/xl/workbook.xml',
         '/xl/worksheets/sheet1.xml',
         '/xl/chartsheets/sheet1.xml',
         '/xl/theme/theme1.xml',
         '/xl/styles.xml',
         '/xl/sharedStrings.xml',
         '/xl/drawings/drawing1.xml',
         '/xl/charts/chart1.xml',
         '/docProps/core.xml',
         '/docProps/app.xml',
     ]
Exemple #17
0
 def test_find_none(self, Manifest):
     manifest = Manifest()
     assert manifest.find(WORKSHEET_TYPE) is None
Exemple #18
0
class ExcelWriter(object):
    """Write a workbook object to an Excel file."""
    def __init__(self, workbook, archive):
        self._archive = archive
        self.workbook = workbook
        self.manifest = Manifest()
        self.vba_modified = set()
        self._tables = []
        self._charts = []
        self._images = []
        self._drawings = []
        self._comments = []
        self._pivots = []

    def write_data(self):
        """Write the various xml files into the zip archive."""
        # cleanup all worksheets
        archive = self._archive

        archive.writestr(ARC_ROOT_RELS, write_root_rels(self.workbook))
        props = ExtendedProperties()
        archive.writestr(ARC_APP, tostring(props.to_tree()))

        archive.writestr(ARC_CORE,
                         tostring(self.workbook.properties.to_tree()))
        if self.workbook.loaded_theme:
            archive.writestr(ARC_THEME, self.workbook.loaded_theme)
        else:
            archive.writestr(ARC_THEME, write_theme())

        self._write_worksheets()
        self._write_chartsheets()
        self._write_images()
        self._write_charts()

        self._archive.writestr(
            ARC_SHARED_STRINGS,
            write_string_table(self.workbook.shared_strings))
        self._write_external_links()

        stylesheet = write_stylesheet(self.workbook)
        archive.writestr(ARC_STYLE, tostring(stylesheet))

        archive.writestr(ARC_WORKBOOK, write_workbook(self.workbook))
        archive.writestr(ARC_WORKBOOK_RELS, write_workbook_rels(self.workbook))

        self._merge_vba()

        self.manifest._write(archive, self.workbook)

    def _merge_vba(self):
        """
        If workbook contains macros then extract associated files from cache
        of old file and add to archive
        """
        ARC_VBA = re.compile("|".join(
            ('xl/vba', r'xl/drawings/.*vmlDrawing\d\.vml', 'xl/ctrlProps',
             'customUI', 'xl/activeX', r'xl/media/.*\.emf')))

        if self.workbook.vba_archive:
            for name in set(
                    self.workbook.vba_archive.namelist()) - self.vba_modified:
                if ARC_VBA.match(name):
                    self._archive.writestr(
                        name, self.workbook.vba_archive.read(name))

    def _write_images(self):
        # delegate to object
        for img in self._images:
            self._archive.writestr(img.path[1:], img._data())

    def _write_charts(self):
        # delegate to object
        if len(self._charts) != len(set(self._charts)):
            raise InvalidFileException(
                "The same chart cannot be used in more than one worksheet")
        for chart in self._charts:
            self._archive.writestr(chart.path[1:], tostring(chart._write()))
            self.manifest.append(chart)

    def _write_drawing(self, drawing):
        """
        Write a drawing
        """
        self._drawings.append(drawing)
        drawing._id = len(self._drawings)
        for chart in drawing.charts:
            self._charts.append(chart)
            chart._id = len(self._charts)
        for img in drawing.images:
            self._images.append(img)
            img._id = len(self._images)
        rels_path = get_rels_path(drawing.path)[1:]
        self._archive.writestr(drawing.path[1:], tostring(drawing._write()))
        self._archive.writestr(rels_path, tostring(drawing._write_rels()))
        self.manifest.append(drawing)

    def _write_chartsheets(self):
        for idx, sheet in enumerate(self.workbook.chartsheets, 1):

            sheet._id = idx
            xml = tostring(sheet.to_tree())

            self._archive.writestr(sheet.path[1:], xml)
            self.manifest.append(sheet)

            if sheet._drawing:
                self._write_drawing(sheet._drawing)

                rel = Relationship(type="drawing", Target=sheet._drawing.path)
                rels = RelationshipList()
                rels.append(rel)
                tree = rels.to_tree()

                rels_path = get_rels_path(sheet.path[1:])
                self._archive.writestr(rels_path, tostring(tree))

    def _write_comment(self, ws):

        cs = CommentSheet.from_comments(ws._comments)
        self._comments.append(cs)
        cs._id = len(self._comments)
        self._archive.writestr(cs.path[1:], tostring(cs.to_tree()))
        self.manifest.append(cs)

        if ws.legacy_drawing is None:
            ws.legacy_drawing = 'xl/drawings/commentsDrawing{0}.vml'.format(
                cs._id)
            vml = None
        else:
            vml = fromstring(self.workbook.vba_archive.read(ws.legacy_drawing))

        vml = cs.write_shapes(vml)

        self._archive.writestr(ws.legacy_drawing, vml)
        self.vba_modified.add(ws.legacy_drawing)

        comment_rel = Relationship(Id="comments",
                                   type=cs._rel_type,
                                   Target=cs.path)
        ws._rels.append(comment_rel)

    def _write_worksheets(self):

        pivot_caches = set()

        for idx, ws in enumerate(self.workbook.worksheets, 1):

            ws._id = idx
            xml = ws._write()
            rels_path = get_rels_path(ws.path)[1:]

            self._archive.writestr(ws.path[1:], xml)
            self.manifest.append(ws)

            if ws._drawing:
                self._write_drawing(ws._drawing)

                for r in ws._rels.Relationship:
                    if "drawing" in r.Type:
                        r.Target = ws._drawing.path

            if ws._comments:
                self._write_comment(ws)

            if ws.legacy_drawing is not None:
                shape_rel = Relationship(type="vmlDrawing",
                                         Id="anysvml",
                                         Target="/" + ws.legacy_drawing)
                ws._rels.append(shape_rel)

            for t in ws._tables:
                self._tables.append(t)
                t.id = len(self._tables)
                t._write(self._archive)
                self.manifest.append(t)
                ws._rels[t._rel_id].Target = t.path

            for p in ws._pivots:
                if p.cache not in pivot_caches:
                    pivot_caches.add(p.cache)
                    p.cache._id = len(pivot_caches)

                self._pivots.append(p)
                p._id = len(self._pivots)
                p._write(self._archive, self.manifest)
                self.workbook._pivots.append(p)
                r = Relationship(Type=p.rel_type, Target=p.path)
                ws._rels.append(r)

            if ws._rels:
                tree = ws._rels.to_tree()
                self._archive.writestr(rels_path, tostring(tree))

    def _write_external_links(self):
        # delegate to object
        """Write links to external workbooks"""
        wb = self.workbook
        for idx, link in enumerate(wb._external_links, 1):
            link._id = idx
            rels_path = get_rels_path(link.path[1:])

            xml = link.to_tree()
            self._archive.writestr(link.path[1:], tostring(xml))
            rels = RelationshipList()
            rels.append(link.file_link)
            self._archive.writestr(rels_path, tostring(rels.to_tree()))
            self.manifest.append(link)

    def save(self, filename):
        """Write data into the archive."""
        self.write_data()
        self._archive.close()
Exemple #19
0
def check_content_type(workbook_type, archive):
    src = archive.read(ARC_CONTENT_TYPES)
    node = fromstring(src)
    package = Manifest.from_tree(node)
    assert workbook_type in package
Exemple #20
0
 def test_no_dupe_overrides(self, Manifest):
     manifest = Manifest()
     assert len(manifest.Override) == 5
     manifest.Override.append("a")
     manifest.Override.append("a")
     assert len(manifest.Override) == 6
Exemple #21
0
def test_no_workbook():
    from openpyxl25.reader.excel import _find_workbook_part

    with pytest.raises(IOError):
        part = _find_workbook_part(Manifest())
Exemple #22
0
def load_workbook(filename,
                  read_only=False,
                  keep_vba=KEEP_VBA,
                  data_only=False,
                  guess_types=False,
                  keep_links=True):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :param keep_links: whether links to external workbooks should be preserved. The default is True
    :type keep_links: bool

    :rtype: :class:`openpyxl25.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl25.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only

    src = archive.read(ARC_CONTENT_TYPES)
    root = fromstring(src)
    package = Manifest.from_tree(root)

    wb_part = _find_workbook_part(package)
    parser = WorkbookParser(archive, wb_part.PartName[1:])
    wb = parser.wb
    wb._data_only = data_only
    wb._read_only = read_only
    wb._keep_links = keep_links
    wb.guess_types = guess_types
    wb.template = wb_part.ContentType in (XLTX, XLTM)
    parser.parse()
    wb._sheets = []

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
        for name in archive.namelist():
            wb.vba_archive.writestr(name, archive.read(name))

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    if ARC_CORE in valid_files:
        src = fromstring(archive.read(ARC_CORE))
        wb.properties = DocumentProperties.from_tree(src)

    shared_strings = []
    ct = package.find(SHARED_STRINGS)
    if ct is not None:
        strings_path = ct.PartName[1:]
        shared_strings = read_string_table(archive.read(strings_path))

    if ARC_THEME in valid_files:
        wb.loaded_theme = archive.read(ARC_THEME)

    apply_stylesheet(archive, wb)  # bind styles to workbook
    pivot_caches = parser.pivot_caches

    # get worksheets
    for sheet, rel in parser.find_sheets():
        sheet_name = sheet.name
        worksheet_path = rel.target
        rels_path = get_rels_path(worksheet_path)
        rels = []
        if rels_path in valid_files:
            rels = get_dependents(archive, rels_path)

        if not worksheet_path in valid_files:
            continue

        if read_only:
            ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                   shared_strings)

            wb._sheets.append(ws)
        else:
            fh = archive.open(worksheet_path)
            ws = wb.create_sheet(sheet_name)
            ws._rels = rels
            ws_parser = WorkSheetParser(ws, fh, shared_strings)
            ws_parser.parse()

            if rels:
                # assign any comments to cells
                for r in rels.find(COMMENTS_NS):
                    src = archive.read(r.target)
                    comment_sheet = CommentSheet.from_tree(fromstring(src))
                    for ref, comment in comment_sheet.comments:
                        ws[ref].comment = comment

                # preserve link to VML file if VBA
                if (wb.vba_archive is not None
                        and ws.legacy_drawing is not None):
                    ws.legacy_drawing = rels[ws.legacy_drawing].target

                for t in ws_parser.tables:
                    src = archive.read(t)
                    xml = fromstring(src)
                    table = Table.from_tree(xml)
                    ws.add_table(table)

                drawings = rels.find(SpreadsheetDrawing._rel_type)
                for rel in drawings:
                    for c in find_charts(archive, rel.target):
                        ws.add_chart(c, c.anchor)

                pivot_rel = rels.find(TableDefinition.rel_type)
                for r in pivot_rel:
                    pivot_path = r.Target
                    src = archive.read(pivot_path)
                    tree = fromstring(src)
                    pivot = TableDefinition.from_tree(tree)
                    pivot.cache = pivot_caches[pivot.cacheId]
                    ws.add_pivot(pivot)

        ws.sheet_state = sheet.state
        ws._rels = []  # reset

    parser.assign_names()

    #wb._differential_styles.styles =  [] # tables may depened upon dxf

    archive.close()
    return wb
Exemple #23
0
 def test_no_dupe_types(self, Manifest):
     manifest = Manifest()
     assert len(manifest.Default) == 2
     manifest.Default.append("a")
     manifest.Default.append("a")
     assert len(manifest.Default) == 3