def match_default_fixture(self, request): partname_str, ext, content_type = request.param partname = PackURI(partname_str) ct_map = _ContentTypeMap() ct_map._add_override(PackURI('/bar/foo.xyz'), 'application/xyz') ct_map._add_default(ext, content_type) return ct_map, partname, content_type
def match_override_fixture(self, request): partname_str, should_match_partname_str = request.param partname = PackURI(partname_str) should_match_partname = PackURI(should_match_partname_str) content_type = 'appl/vnd-foobar' ct_map = _ContentTypeMap() ct_map._add_override(partname, content_type) return ct_map, should_match_partname, content_type
def it_can_calculate_relative_ref_value(self): cases = ( ('/', '/ppt/presentation.xml', 'ppt/presentation.xml'), ('/ppt', '/ppt/slideMasters/slideMaster1.xml', 'slideMasters/slideMaster1.xml'), ('/ppt/slides', '/ppt/slideLayouts/slideLayout1.xml', '../slideLayouts/slideLayout1.xml'), ) for baseURI, uri_str, expected_relative_ref in cases: pack_uri = PackURI(uri_str) assert pack_uri.relative_ref(baseURI) == expected_relative_ref
def _mock_part(self, request, name, partname_str, content_type): partname = PackURI(partname_str) return instance_mock(request, Part, name=name, partname=partname, content_type=content_type)
def replace_part(items, raw_items): for k, p in items: if path.basename(p.partname) == from_pic: image = Image.from_file(to_pic) partname = path.join(path.dirname(p.partname), image.filename) partname = PackURI(partname) img_part = ImagePart.from_image(image, partname) raw_items.__setitem__(k, img_part) break
def filename_fixture(self, request, image_): partname = PackURI('/word/media/image666.png') if request.param == 'loaded': image_part = ImagePart(partname, None, None, None) expected_filename = 'image.png' elif request.param == 'new': image_.filename = 'foobar.PXG' image_part = ImagePart(partname, None, None, image_) expected_filename = image_.filename return image_part, expected_filename
def it_should_have_relative_ref_for_internal_rel(self): """ Internal relationships (TargetMode == 'Internal' in the XML) should have a relative ref, e.g. '../slideLayouts/slideLayout1.xml', for the target_ref attribute. """ part = Mock(name='part', partname=PackURI('/ppt/media/image1.png')) baseURI = '/ppt/slides' rel = _Relationship(None, None, part, baseURI) # external=False assert rel.target_ref == '../media/image1.png'
def _update_part(self): if self.part is None: # Create a new part for custom properties partname = PackURI('/docProps/custom.xml') self.part = Part(partname, CT.OFC_CUSTOM_PROPERTIES, serialize_part_xml(self._element), self.doc.part.package) self.doc.part.package.relate_to(self.part, RT.CUSTOM_PROPERTIES) self._element = parse_xml(self.part.blob) else: self.part._blob = serialize_part_xml(self._element)
def prepare_docx(file_name: str, drawing_dir: str = None) -> bytes: """ Prepare docx document for Pandoc conversion: * Mark code blocks with SourceCode style * Replace vector graphics with raster """ doc = Document(file_name) doc.styles.add_style('SourceCode', WD_STYLE_TYPE.PARAGRAPH) drawing_idx = 0 for para in doc.paragraphs: if len(para.runs) == 0: continue for run_idx, run in enumerate(para.runs): if run_idx == 0 and run.text == '\t': continue # Ignore leading tabs if run_idx == 0 and run.font.name in ['Consolas', 'Courier New']: # If paragraph starts with a snippet in monospace font, # consider it a code block and mark it with SourceCode style # https://groups.google.com/d/msg/pandoc-discuss/SIwE9dhGF4U/Wjy8zmQ1CQAJ para.style = doc.styles['SourceCode'] break if run.font.name in ['Consolas', 'Courier New']: # Mark with striketrough style to convert to inline code later run.font.strike = True if para.runs[0].element.xpath( './/*[@uri="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"]' ): # WordprocessingML group found if drawing_dir: # Pandoc can't convert embedded vector graphics # So we insert previously downloaded image in the same run drawing_path = os.path.join(drawing_dir, f'{drawing_idx:02d}.png') para.runs[0].add_picture(drawing_path) drawing_idx += 1 for part in doc.part.related_parts.values(): if isinstance(part, ImagePart): # Use deterministic names for images image_hash = part.image.sha1 image_name = os.path.join( part.partname.baseURI, f'{image_hash}.{part.partname.ext}', ) part.partname = PackURI(image_name) buffer = io.BytesIO() doc.save(buffer) return buffer.getvalue()
def next_partname(self, template): """Return a |PackURI| instance representing partname matching *template*. The returned part-name has the next available numeric suffix to distinguish it from other parts of its type. *template* is a printf (%)-style template string containing a single replacement item, a '%d' to be used to insert the integer portion of the partname. Example: "/word/header%d.xml" """ partnames = {part.partname for part in self.iter_parts()} for n in range(1, len(partnames) + 2): candidate_partname = template % n if candidate_partname not in partnames: return PackURI(candidate_partname)
def next_partname(self, tmpl): """ Return a |PackURI| instance representing the next available partname matching *tmpl*, which is a printf (%)-style template string containing a single replacement item, a '%d' to be used to insert the integer portion of the partname. Example: '/word/slides/slide%d.xml' """ tmpl = tmpl.replace('/ppt', '/word') partnames = [part.partname for part in self.iter_parts()] for n in range(1, len(partnames) + 2): candidate_partname = tmpl % n if candidate_partname not in partnames: return PackURI(candidate_partname) raise Exception('ProgrammingError: ran out of candidate_partnames')
def it_can_write_a_blob(self, pkg_file): # setup ------------------------ pack_uri = PackURI('/part/name.xml') blob = '<BlobbityFooBlob/>'.encode('utf-8') # exercise --------------------- pkg_writer = PhysPkgWriter(pkg_file) pkg_writer.write(pack_uri, blob) pkg_writer.close() # verify ----------------------- written_blob_sha1 = hashlib.sha1(blob).hexdigest() zipf = ZipFile(pkg_file, 'r') retrieved_blob = zipf.read(pack_uri.membername) zipf.close() retrieved_blob_sha1 = hashlib.sha1(retrieved_blob).hexdigest() assert retrieved_blob_sha1 == written_blob_sha1
def cases(self, expected_values): """ Return list of tuples zipped from uri_str cases and *expected_values*. Raise if lengths don't match. """ uri_str_cases = [ '/', '/ppt/presentation.xml', '/ppt/slides/slide1.xml', ] if len(expected_values) != len(uri_str_cases): msg = "len(expected_values) differs from len(uri_str_cases)" raise AssertionError(msg) pack_uris = [PackURI(uri_str) for uri_str in uri_str_cases] return zip(pack_uris, expected_values)
def footnote_part(self): """The footnote part of the document.""" try: footnote_part = self.doc.part.rels.part_with_reltype(RT.FOOTNOTES) except KeyError: # Create a new empty footnotes part partname = PackURI('/word/footnotes.xml') content_type = CT.WML_FOOTNOTES xml_path = os.path.join(os.path.dirname(__file__), 'templates', 'footnotes.xml') with open(xml_path, 'rb') as f: xml_bytes = f.read() footnote_part = Part(partname, content_type, xml_bytes, self.doc.part.package) self.doc.part.relate_to(footnote_part, RT.FOOTNOTES) return footnote_part
def dimensions_fixture(self, request): image_file_path = test_file('monty-truth.png') image = Image.from_file(image_file_path) expected_cx, expected_cy = 1905000, 2717800 # case 1: image part is loaded by PartFactory w/no Image inst if request.param == 'loaded': partname = PackURI('/word/media/image1.png') content_type = CT.PNG image_part = ImagePart.load(partname, content_type, image.blob, None) # case 2: image part is newly created from image file elif request.param == 'new': image_part = ImagePart.from_image(image, None) return image_part, expected_cx, expected_cy
def numbering_part(self): """The numbering part of the document.""" try: numbering_part = self.doc.part.rels.part_with_reltype(RT.NUMBERING) except KeyError: # Create a new empty numbering part partname = PackURI('/word/numbering.xml') content_type = CT.WML_NUMBERING xml_path = os.path.join(os.path.dirname(__file__), 'templates', 'numbering.xml') with open(xml_path, 'rb') as f: xml_bytes = f.read() element = parse_xml(xml_bytes) numbering_part = NumberingPart(partname, content_type, element, self.doc.part.package) self.doc.part.relate_to(numbering_part, RT.NUMBERING) return numbering_part
def footer_part(self, content=None): """The footer part of the document.""" footer_rels = [ rel for rel in self.doc.part.rels.values() if rel.reltype == RT.FOOTER] next_id = len(footer_rels) + 1 # Create a new header part partname = PackURI('/word/footer%s.xml' % next_id) content_type = CT.WML_FOOTER if not content: xml_path = os.path.join( os.path.dirname(__file__), 'templates', 'footer.xml') with open(xml_path, 'rb') as f: content = f.read() footer_part = Part( partname, content_type, content, self.doc.part.package) self.doc.part.relate_to(footer_part, RT.FOOTER) return footer_part
def add_relationship(self, src_part, dst_part, relationship): """Add relationship and it's target part""" if relationship.is_external: new_rid = dst_part.rels.get_or_add_ext_rel(relationship.reltype, relationship.target_ref) return dst_part.rels[new_rid] part = relationship.target_part # Determine next partname name = FILENAME_IDX_RE.match(part.partname).group(1) used_part_numbers = [ FILENAME_IDX_RE.match(p.partname).group(2) for p in dst_part.package.iter_parts() if p.partname.startswith(name) ] used_part_numbers = [ int(idx) for idx in used_part_numbers if idx is not None ] for n in range(1, len(used_part_numbers) + 2): if n not in used_part_numbers: next_part_number = n break next_partname = PackURI('%s%d.%s' % (name, next_part_number, part.partname.ext)) new_part = Part(next_partname, part.content_type, part.blob, dst_part.package) new_rel = dst_part.rels.get_or_add(relationship.reltype, new_part) # Sort relationships by rId to get the same rId when adding them to the # new part. This avoids fixing references. def sort_key(r): match = RID_IDX_RE.match(r.rId) return int(match.group(1)) for rel in sorted(part.rels.values(), key=sort_key): self.add_relationship(part, new_part, rel) return new_rel
def it_returns_none_when_part_has_no_rels_xml(self, dir_reader): partname = PackURI('/ppt/viewProps.xml') rels_xml = dir_reader.rels_xml_for(partname) assert rels_xml is None
def partname_set_fixture(self): old_partname = PackURI('/old/part/name') new_partname = PackURI('/new/part/name') part = Part(old_partname, None, None, None) return part, new_partname
def partname_get_fixture(self): partname = PackURI('/part/name') part = Part(partname, None, None, None) return part, partname
def it_should_raise_on_construct_with_bad_pack_uri_str(self): with pytest.raises(ValueError): PackURI('foobar')
def _image_partname(self, n): return PackURI('/word/media/image%d.png' % n)
def it_should_raise_on_partname_not_found(self): ct_map = _ContentTypeMap() with pytest.raises(KeyError): ct_map[PackURI('/!blat/rhumba.1x&')]
def it_should_raise_on_key_not_instance_of_PackURI(self): ct_map = _ContentTypeMap() ct_map._overrides = {PackURI('/part/name1.xml'): 'app/vnd.type1'} with pytest.raises(KeyError): ct_map['/part/name1.xml']
def it_can_construct_from_relative_ref(self): baseURI = '/ppt/slides' relative_ref = '../slideLayouts/slideLayout1.xml' pack_uri = PackURI.from_rel_ref(baseURI, relative_ref) assert pack_uri == '/ppt/slideLayouts/slideLayout1.xml'
def it_can_retrieve_the_blob_for_a_pack_uri(self, phys_reader): pack_uri = PackURI('/word/document.xml') blob = phys_reader.blob_for(pack_uri) sha1 = hashlib.sha1(blob).hexdigest() assert sha1 == 'b9b4a98bcac7c5a162825b60c3db7df11e02ac5f'
def it_can_retrieve_the_blob_for_a_pack_uri(self, dir_reader): pack_uri = PackURI('/word/document.xml') blob = dir_reader.blob_for(pack_uri) sha1 = hashlib.sha1(blob).hexdigest() pytest.skip('hacking on expanded_docx atm, sha is off') assert sha1 == '0e62d87ea74ea2b8088fd11ee97b42da9b4c77b0'
def image_partname(n): return PackURI('/word/media/image%d.%s' % (n, ext))
def it_can_retrieve_the_blob_for_a_pack_uri(self, dir_reader): pack_uri = PackURI('/word/document.xml') blob = dir_reader.blob_for(pack_uri) sha1 = hashlib.sha1(blob).hexdigest() assert sha1 == '0e62d87ea74ea2b8088fd11ee97b42da9b4c77b0'