def scour_xml(doc): in_string = doc.toxml() doc.unlink() options = scour.sanitizeOptions( Values({ "remove_descriptive_elements": True, "enable_viewboxing": True, "strip_ids": True, "protect_ids_list": "ayah_markers,content", })) # scour the string out_string = scour.scourString(in_string, options) # prepare the output xml.dom.minidom object doc = minidom.parseString(out_string.encode("utf-8")) # since minidom does not seem to parse DTDs properly # manually declare all attributes with name "id" to be of type ID # (otherwise things like doc.getElementById() won't work) all_nodes = doc.getElementsByTagName("*") for node in all_nodes: try: node.setIdAttribute("id") except NotFoundErr: pass return doc
def get_scour_options(): options = scour.sanitizeOptions() options.remove_descriptions = True options.remove_descriptive_elements = True options.remove_metadata = True options.remove_titles = True options.shorten_ids = False options.strip_comments = True options.strip_xml_prolog = True options.strip_xml_space_attribute = True return options
def clean_path_data(path_data, accuracy): # noinspection PyPep8Naming, PyMethodMayBeStatic class ElementWrapper: def __init__(self, data): self.data = data def getAttribute(self, attr): if attr == 'd': return self.data else: return '' def hasAttribute(self, attr): if attr == 'd': return True else: return False def setAttribute(self, attr, value): if attr == 'd': self.data = value else: return def nodeType(self): return Node.ELEMENT_NODE import scour scour.scour._num_path_segments_removed = 0 scour.scour._num_bytes_saved_in_path_data = 0 context = decimal.Context(prec=accuracy) scour.scour.scouringContext = context scour.scour.scouringContextC = context wrapper = ElementWrapper(path_data) cleanPath(wrapper, sanitizeOptions(None)) return wrapper.data
def saveJpg(dest, fileout, quality): # The image quality, on a scale from 0 (worst) to 95 (best), the default is 75 if quality < 1: quality = 75 elif quality > 95: quality = 95 dest.convert('RGB').save(fileout, 'JPEG', optimize=True, subsampling=0, quality=quality) # python-scour if ".svg" in fileout: from scour.scour import sanitizeOptions, start options = sanitizeOptions() options.strip_xml_prolog = True # --strip-xml-prolog options.remove_metadata = True # --remove-metadata options.strip_comments = True # --enable-comment-stripping options.strip_ids = True # --enable-id-stripping options.indent_type = None # --indent=none start(options, open(filein, 'rb'), open(fileout, 'wb')) # python-pil else: from PIL import Image, ImageSequence source = Image.open(filein) size = calcSize(source, size) if source.format == 'GIF' and ".gif" in fileout: dest = resizeAnimatedGif(source, size, fixed) saveGif(dest, fileout, quality)