Example #1
0
 def match_default_fixture(self, request):
     partname_str, ext, content_type = request.param
     partname = PackURI(partname_str)
     ct_map = _ContentTypeMap()
     ct_map._add_override(PackURI('/bar/foo.xyz'), 'application/xyz')
     ct_map._add_default(ext, content_type)
     return ct_map, partname, content_type
Example #2
0
 def match_override_fixture(self, request):
     partname_str, should_match_partname_str = request.param
     partname = PackURI(partname_str)
     should_match_partname = PackURI(should_match_partname_str)
     content_type = 'appl/vnd-foobar'
     ct_map = _ContentTypeMap()
     ct_map._add_override(partname, content_type)
     return ct_map, should_match_partname, content_type
 def it_can_calculate_relative_ref_value(self):
     cases = (
         ('/', '/ppt/presentation.xml', 'ppt/presentation.xml'),
         ('/ppt', '/ppt/slideMasters/slideMaster1.xml',
          'slideMasters/slideMaster1.xml'),
         ('/ppt/slides', '/ppt/slideLayouts/slideLayout1.xml',
          '../slideLayouts/slideLayout1.xml'),
     )
     for baseURI, uri_str, expected_relative_ref in cases:
         pack_uri = PackURI(uri_str)
         assert pack_uri.relative_ref(baseURI) == expected_relative_ref
Example #4
0
 def _mock_part(self, request, name, partname_str, content_type):
     partname = PackURI(partname_str)
     return instance_mock(request,
                          Part,
                          name=name,
                          partname=partname,
                          content_type=content_type)
Example #5
0
 def replace_part(items, raw_items):
     for k, p in items:
         if path.basename(p.partname) == from_pic:
             image = Image.from_file(to_pic)
             partname = path.join(path.dirname(p.partname), image.filename)
             partname = PackURI(partname)
             img_part = ImagePart.from_image(image, partname)
             raw_items.__setitem__(k, img_part)
             break
Example #6
0
 def filename_fixture(self, request, image_):
     partname = PackURI('/word/media/image666.png')
     if request.param == 'loaded':
         image_part = ImagePart(partname, None, None, None)
         expected_filename = 'image.png'
     elif request.param == 'new':
         image_.filename = 'foobar.PXG'
         image_part = ImagePart(partname, None, None, image_)
         expected_filename = image_.filename
     return image_part, expected_filename
Example #7
0
 def it_should_have_relative_ref_for_internal_rel(self):
     """
     Internal relationships (TargetMode == 'Internal' in the XML) should
     have a relative ref, e.g. '../slideLayouts/slideLayout1.xml', for
     the target_ref attribute.
     """
     part = Mock(name='part', partname=PackURI('/ppt/media/image1.png'))
     baseURI = '/ppt/slides'
     rel = _Relationship(None, None, part, baseURI)  # external=False
     assert rel.target_ref == '../media/image1.png'
Example #8
0
 def _update_part(self):
     if self.part is None:
         # Create a new part for custom properties
         partname = PackURI('/docProps/custom.xml')
         self.part = Part(partname, CT.OFC_CUSTOM_PROPERTIES,
                          serialize_part_xml(self._element),
                          self.doc.part.package)
         self.doc.part.package.relate_to(self.part, RT.CUSTOM_PROPERTIES)
         self._element = parse_xml(self.part.blob)
     else:
         self.part._blob = serialize_part_xml(self._element)
Example #9
0
def prepare_docx(file_name: str, drawing_dir: str = None) -> bytes:
    """
    Prepare docx document for Pandoc conversion:
    * Mark code blocks with SourceCode style
    * Replace vector graphics with raster
    """
    doc = Document(file_name)
    doc.styles.add_style('SourceCode', WD_STYLE_TYPE.PARAGRAPH)
    drawing_idx = 0

    for para in doc.paragraphs:
        if len(para.runs) == 0:
            continue

        for run_idx, run in enumerate(para.runs):
            if run_idx == 0 and run.text == '\t':
                continue  # Ignore leading tabs
            if run_idx == 0 and run.font.name in ['Consolas', 'Courier New']:
                # If paragraph starts with a snippet in monospace font,
                # consider it a code block and mark it with SourceCode style
                # https://groups.google.com/d/msg/pandoc-discuss/SIwE9dhGF4U/Wjy8zmQ1CQAJ
                para.style = doc.styles['SourceCode']
                break
            if run.font.name in ['Consolas', 'Courier New']:
                # Mark with striketrough style to convert to inline code later
                run.font.strike = True

        if para.runs[0].element.xpath(
                './/*[@uri="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"]'
        ):
            # WordprocessingML group found
            if drawing_dir:
                # Pandoc can't convert embedded vector graphics
                # So we insert previously downloaded image in the same run
                drawing_path = os.path.join(drawing_dir,
                                            f'{drawing_idx:02d}.png')
                para.runs[0].add_picture(drawing_path)
                drawing_idx += 1

    for part in doc.part.related_parts.values():
        if isinstance(part, ImagePart):
            # Use deterministic names for images
            image_hash = part.image.sha1
            image_name = os.path.join(
                part.partname.baseURI,
                f'{image_hash}.{part.partname.ext}',
            )
            part.partname = PackURI(image_name)

    buffer = io.BytesIO()
    doc.save(buffer)
    return buffer.getvalue()
Example #10
0
    def next_partname(self, template):
        """Return a |PackURI| instance representing partname matching *template*.

        The returned part-name has the next available numeric suffix to distinguish it
        from other parts of its type. *template* is a printf (%)-style template string
        containing a single replacement item, a '%d' to be used to insert the integer
        portion of the partname. Example: "/word/header%d.xml"
        """
        partnames = {part.partname for part in self.iter_parts()}
        for n in range(1, len(partnames) + 2):
            candidate_partname = template % n
            if candidate_partname not in partnames:
                return PackURI(candidate_partname)
Example #11
0
 def next_partname(self, tmpl):
     """
     Return a |PackURI| instance representing the next available partname
     matching *tmpl*, which is a printf (%)-style template string
     containing a single replacement item, a '%d' to be used to insert the
     integer portion of the partname. Example: '/word/slides/slide%d.xml'
     """
     tmpl = tmpl.replace('/ppt', '/word')
     partnames = [part.partname for part in self.iter_parts()]
     for n in range(1, len(partnames) + 2):
         candidate_partname = tmpl % n
         if candidate_partname not in partnames:
             return PackURI(candidate_partname)
     raise Exception('ProgrammingError: ran out of candidate_partnames')
Example #12
0
 def it_can_write_a_blob(self, pkg_file):
     # setup ------------------------
     pack_uri = PackURI('/part/name.xml')
     blob = '<BlobbityFooBlob/>'.encode('utf-8')
     # exercise ---------------------
     pkg_writer = PhysPkgWriter(pkg_file)
     pkg_writer.write(pack_uri, blob)
     pkg_writer.close()
     # verify -----------------------
     written_blob_sha1 = hashlib.sha1(blob).hexdigest()
     zipf = ZipFile(pkg_file, 'r')
     retrieved_blob = zipf.read(pack_uri.membername)
     zipf.close()
     retrieved_blob_sha1 = hashlib.sha1(retrieved_blob).hexdigest()
     assert retrieved_blob_sha1 == written_blob_sha1
 def cases(self, expected_values):
     """
     Return list of tuples zipped from uri_str cases and
     *expected_values*. Raise if lengths don't match.
     """
     uri_str_cases = [
         '/',
         '/ppt/presentation.xml',
         '/ppt/slides/slide1.xml',
     ]
     if len(expected_values) != len(uri_str_cases):
         msg = "len(expected_values) differs from len(uri_str_cases)"
         raise AssertionError(msg)
     pack_uris = [PackURI(uri_str) for uri_str in uri_str_cases]
     return zip(pack_uris, expected_values)
Example #14
0
 def footnote_part(self):
     """The footnote part of the document."""
     try:
         footnote_part = self.doc.part.rels.part_with_reltype(RT.FOOTNOTES)
     except KeyError:
         # Create a new empty footnotes part
         partname = PackURI('/word/footnotes.xml')
         content_type = CT.WML_FOOTNOTES
         xml_path = os.path.join(os.path.dirname(__file__), 'templates',
                                 'footnotes.xml')
         with open(xml_path, 'rb') as f:
             xml_bytes = f.read()
         footnote_part = Part(partname, content_type, xml_bytes,
                              self.doc.part.package)
         self.doc.part.relate_to(footnote_part, RT.FOOTNOTES)
     return footnote_part
Example #15
0
    def dimensions_fixture(self, request):
        image_file_path = test_file('monty-truth.png')
        image = Image.from_file(image_file_path)
        expected_cx, expected_cy = 1905000, 2717800

        # case 1: image part is loaded by PartFactory w/no Image inst
        if request.param == 'loaded':
            partname = PackURI('/word/media/image1.png')
            content_type = CT.PNG
            image_part = ImagePart.load(partname, content_type, image.blob,
                                        None)
        # case 2: image part is newly created from image file
        elif request.param == 'new':
            image_part = ImagePart.from_image(image, None)

        return image_part, expected_cx, expected_cy
Example #16
0
 def numbering_part(self):
     """The numbering part of the document."""
     try:
         numbering_part = self.doc.part.rels.part_with_reltype(RT.NUMBERING)
     except KeyError:
         # Create a new empty numbering part
         partname = PackURI('/word/numbering.xml')
         content_type = CT.WML_NUMBERING
         xml_path = os.path.join(os.path.dirname(__file__), 'templates',
                                 'numbering.xml')
         with open(xml_path, 'rb') as f:
             xml_bytes = f.read()
         element = parse_xml(xml_bytes)
         numbering_part = NumberingPart(partname, content_type, element,
                                        self.doc.part.package)
         self.doc.part.relate_to(numbering_part, RT.NUMBERING)
     return numbering_part
Example #17
0
 def footer_part(self, content=None):
     """The footer part of the document."""
     footer_rels = [
         rel for rel in self.doc.part.rels.values() if rel.reltype == RT.FOOTER]
     next_id = len(footer_rels) + 1
     # Create a new header part
     partname = PackURI('/word/footer%s.xml' % next_id)
     content_type = CT.WML_FOOTER
     if not content:
         xml_path = os.path.join(
             os.path.dirname(__file__), 'templates', 'footer.xml')
         with open(xml_path, 'rb') as f:
             content = f.read()
     footer_part = Part(
         partname, content_type, content, self.doc.part.package)
     self.doc.part.relate_to(footer_part, RT.FOOTER)
     return footer_part
Example #18
0
    def add_relationship(self, src_part, dst_part, relationship):
        """Add relationship and it's target part"""
        if relationship.is_external:
            new_rid = dst_part.rels.get_or_add_ext_rel(relationship.reltype,
                                                       relationship.target_ref)
            return dst_part.rels[new_rid]

        part = relationship.target_part

        # Determine next partname
        name = FILENAME_IDX_RE.match(part.partname).group(1)
        used_part_numbers = [
            FILENAME_IDX_RE.match(p.partname).group(2)
            for p in dst_part.package.iter_parts()
            if p.partname.startswith(name)
        ]
        used_part_numbers = [
            int(idx) for idx in used_part_numbers if idx is not None
        ]

        for n in range(1, len(used_part_numbers) + 2):
            if n not in used_part_numbers:
                next_part_number = n
                break
        next_partname = PackURI('%s%d.%s' %
                                (name, next_part_number, part.partname.ext))

        new_part = Part(next_partname, part.content_type, part.blob,
                        dst_part.package)
        new_rel = dst_part.rels.get_or_add(relationship.reltype, new_part)

        # Sort relationships by rId to get the same rId when adding them to the
        # new part. This avoids fixing references.
        def sort_key(r):
            match = RID_IDX_RE.match(r.rId)
            return int(match.group(1))

        for rel in sorted(part.rels.values(), key=sort_key):
            self.add_relationship(part, new_part, rel)

        return new_rel
Example #19
0
 def it_returns_none_when_part_has_no_rels_xml(self, dir_reader):
     partname = PackURI('/ppt/viewProps.xml')
     rels_xml = dir_reader.rels_xml_for(partname)
     assert rels_xml is None
Example #20
0
 def partname_set_fixture(self):
     old_partname = PackURI('/old/part/name')
     new_partname = PackURI('/new/part/name')
     part = Part(old_partname, None, None, None)
     return part, new_partname
Example #21
0
 def partname_get_fixture(self):
     partname = PackURI('/part/name')
     part = Part(partname, None, None, None)
     return part, partname
 def it_should_raise_on_construct_with_bad_pack_uri_str(self):
     with pytest.raises(ValueError):
         PackURI('foobar')
Example #23
0
 def _image_partname(self, n):
     return PackURI('/word/media/image%d.png' % n)
Example #24
0
 def it_should_raise_on_partname_not_found(self):
     ct_map = _ContentTypeMap()
     with pytest.raises(KeyError):
         ct_map[PackURI('/!blat/rhumba.1x&')]
Example #25
0
 def it_should_raise_on_key_not_instance_of_PackURI(self):
     ct_map = _ContentTypeMap()
     ct_map._overrides = {PackURI('/part/name1.xml'): 'app/vnd.type1'}
     with pytest.raises(KeyError):
         ct_map['/part/name1.xml']
 def it_can_construct_from_relative_ref(self):
     baseURI = '/ppt/slides'
     relative_ref = '../slideLayouts/slideLayout1.xml'
     pack_uri = PackURI.from_rel_ref(baseURI, relative_ref)
     assert pack_uri == '/ppt/slideLayouts/slideLayout1.xml'
Example #27
0
 def it_can_retrieve_the_blob_for_a_pack_uri(self, phys_reader):
     pack_uri = PackURI('/word/document.xml')
     blob = phys_reader.blob_for(pack_uri)
     sha1 = hashlib.sha1(blob).hexdigest()
     assert sha1 == 'b9b4a98bcac7c5a162825b60c3db7df11e02ac5f'
Example #28
0
 def it_can_retrieve_the_blob_for_a_pack_uri(self, dir_reader):
     pack_uri = PackURI('/word/document.xml')
     blob = dir_reader.blob_for(pack_uri)
     sha1 = hashlib.sha1(blob).hexdigest()
     pytest.skip('hacking on expanded_docx atm, sha is off')
     assert sha1 == '0e62d87ea74ea2b8088fd11ee97b42da9b4c77b0'
Example #29
0
 def image_partname(n):
     return PackURI('/word/media/image%d.%s' % (n, ext))
Example #30
0
 def it_can_retrieve_the_blob_for_a_pack_uri(self, dir_reader):
     pack_uri = PackURI('/word/document.xml')
     blob = dir_reader.blob_for(pack_uri)
     sha1 = hashlib.sha1(blob).hexdigest()
     assert sha1 == '0e62d87ea74ea2b8088fd11ee97b42da9b4c77b0'