def test_node_kind_function(self): document = ElementTree.parse(io.StringIO(u'<A/>')) element = ElementTree.Element('schema') attribute = AttributeNode('id', '0212349350') namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') comment = ElementTree.Comment('nothing important') pi = ElementTree.ProcessingInstruction('action', 'nothing to do') text = TextNode('betelgeuse') self.assertEqual(node_kind(document), 'document-node') self.assertEqual(node_kind(element), 'element') self.assertEqual(node_kind(attribute), 'attribute') self.assertEqual(node_kind(namespace), 'namespace') self.assertEqual(node_kind(comment), 'comment') self.assertEqual(node_kind(pi), 'processing-instruction') self.assertEqual(node_kind(text), 'text') self.assertIsNone(node_kind(())) self.assertIsNone(node_kind(None)) self.assertIsNone(node_kind(10)) with patch.multiple(DummyXsdType, is_simple=lambda x: True): xsd_type = DummyXsdType() typed_attribute = TypedAttribute(attribute, xsd_type, '0212349350') self.assertEqual(node_kind(typed_attribute), 'attribute') typed_element = TypedElement(element, xsd_type, None) self.assertEqual(node_kind(typed_element), 'element')
def test_node_kind_property(self): document = DocumentNode(ElementTree.parse(io.StringIO(u'<A/>'))) element = ElementNode(ElementTree.Element('schema')) attribute = AttributeNode('id', '0212349350') namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') comment = CommentNode(ElementTree.Comment('nothing important')) pi = ProcessingInstructionNode( self.context, ElementTree.ProcessingInstruction('action', 'nothing to do')) text = TextNode('betelgeuse') self.assertEqual(document.kind, 'document') self.assertEqual(element.kind, 'element') self.assertEqual(attribute.kind, 'attribute') self.assertEqual(namespace.kind, 'namespace') self.assertEqual(comment.kind, 'comment') self.assertEqual(pi.kind, 'processing-instruction') self.assertEqual(text.kind, 'text') with patch.multiple(DummyXsdType, is_simple=lambda x: True): xsd_type = DummyXsdType() attribute = AttributeNode('id', '0212349350', xsd_type=xsd_type) self.assertEqual(attribute.kind, 'attribute') typed_element = ElementNode(element.elem, xsd_type=xsd_type) self.assertEqual(typed_element.kind, 'element')
def generate_wxi(src, output_filename=None, id=None, diskId=None): add_wix_to_path() prefix = "analysis_tool_" while src[-1] in ('/', '\\'): src = src[:-1] name = os.path.basename(src) id = id or prefix + name.replace('-', '_').replace(' ', '_') output_filename = output_filename or _adjacent_file(id + ".wxi") import subprocess def check_call(args): print " ".join(args) subprocess.check_call(args) check_call(['heat', 'dir', _adjacent_file(src), '-template', 'fragment', '-sreg', '-scom', '-o', output_filename, '-ag', '-cg', id, '-srd', '-var', 'var.' + id, '-dr', id, '-nologo']) tree = ElementTree.parse(output_filename, parser=CommentedTreeBuilder()).getroot() tree.insert(0, ElementTree.Comment('generated with gen_analysis_tool_wxi.py %s\n' % src)) tree.insert(0, ElementTree.ProcessingInstruction('define', '%s=%s' % (id, os.path.normpath(src)))) parent_map = dict((c, p) for p in tree.getiterator() for c in p) for file in tree.findall(".//{http://schemas.microsoft.com/wix/2006/wi}Component/{http://schemas.microsoft.com/wix/2006/wi}File"): if file.get('Source', '').find('.svn') != -1: comp = parent_map[file] parent_map[comp].remove(comp) for dir in tree.findall(".//{http://schemas.microsoft.com/wix/2006/wi}Directory"): if dir.get('Name', '') == '.svn': for dirref in tree.findall(".//{http://schemas.microsoft.com/wix/2006/wi}DirectoryRef"): if dirref.get('Id', '') == dir.get('Id', ''): frag = parent_map[dirref] parent_map[frag].remove(frag) parent_map[dir].remove(dir) if diskId: for component in tree.findall(".//{http://schemas.microsoft.com/wix/2006/wi}Component"): component.attrib['DiskId'] = diskId # add registry nodes componentGroup = tree.findall(".//{http://schemas.microsoft.com/wix/2006/wi}ComponentGroup[@Id='" + id + "']")[0] componentRegistry = ElementTree.SubElement(componentGroup, 'ns0:Component', {'Id': id + '_RegistryEntry', 'Directory': id, 'Guid': '*', 'Win64': 'no'}) manifest = None with open(os.path.join(src, 'analysis_tool.manifest.json'), 'r') as f_p: manifest = json.load(f_p) for tool_name in manifest: registryKey = ElementTree.SubElement(componentRegistry, 'ns0:RegistryKey', {'Root': 'HKLM', 'Key': 'Software\\META\\AnalysisTools\\' + tool_name}) ElementTree.SubElement(registryKey, 'ns0:RegistryValue', {'Name': 'InstallLocation', 'Type': 'string', 'Value': '[INSTALLDIR]\\analysis_tools\\' + name}) ElementTree.SubElement(registryKey, 'ns0:RegistryValue', {'Name': 'Version', 'Type': 'string', 'Value': manifest[tool_name]['version']}) ElementTree.SubElement(registryKey, 'ns0:RegistryValue', {'Name': 'OutputDirectory', 'Type': 'string', 'Value': '[INSTALLDIR]\\analysis_tools\\' + name + '\\' + manifest[tool_name]['outputDirectory']}) ElementTree.SubElement(registryKey, 'ns0:RegistryValue', {'Name': 'RunCommand', 'Type': 'string', 'Value': manifest[tool_name]['runCommand']}) ElementTree.SubElement(registryKey, 'ns0:RegistryValue', {'Name': 'RequiredInterpreter', 'Type': 'string', 'Value': manifest[tool_name]['requiredInterpreter']}) ElementTree.ElementTree(tree).write(output_filename, xml_declaration=True) return name, id
def generate_dir(src, output_filename=None, id=None, diskId=None, mod_function=None): while src[-1] in ('/', '\\'): src = src[:-1] name = os.path.basename(src) id = id or name.replace('-', '_').replace(' ', '_') output_filename = output_filename or (id + ".wxi") args = [ 'heat', 'dir', src, '-template', 'fragment', '-sreg', '-scom', '-o', output_filename, '-ag', '-cg', id, '-srd', '-var', 'var.' + id, '-dr', id, '-nologo' ] print(" ".join(args)) subprocess.check_call(args) ElementTree.register_namespace("", "http://schemas.microsoft.com/wix/2006/wi") tree = ElementTree.parse(output_filename).getroot() tree.insert( 0, ElementTree.Comment('generated with gen_dir_wxi.py %s\n' % src)) tree.insert( 0, ElementTree.ProcessingInstruction( 'define', '%s=%s' % (id, os.path.normpath(src)))) parent_map = dict((c, p) for p in tree.getiterator() for c in p) for file in tree.findall( ".//{http://schemas.microsoft.com/wix/2006/wi}Component/{http://schemas.microsoft.com/wix/2006/wi}File" ): file_Source = file.get('Source', '') if file_Source.find('.svn') != -1 or os.path.basename(file_Source) in ( 'Thumbs.db', 'desktop.ini', '.DS_Store') or file_Source.endswith('.pyc'): comp = parent_map[file] parent_map[comp].remove(comp) for dir in tree.findall( ".//{http://schemas.microsoft.com/wix/2006/wi}Directory"): if dir.get('Name', '') in ('.svn', '__pycache__'): for dirref in tree.findall( ".//{http://schemas.microsoft.com/wix/2006/wi}DirectoryRef" ): if dirref.get('Id', '') == dir.get('Id', ''): frag = parent_map[dirref] parent_map[frag].remove(frag) parent_map[parent_map[dir]].remove(parent_map[dir]) if diskId: for component in tree.findall( ".//{http://schemas.microsoft.com/wix/2006/wi}Component"): component.attrib['DiskId'] = diskId if mod_function: mod_function(tree, parent_map) ElementTree.ElementTree(tree).write(output_filename, xml_declaration=True, encoding='utf-8')
def test_string_value_function(self): token = self.parser.parse('true()') document = ElementTree.parse( io.StringIO(u'<A>123<B1>456</B1><B2>789</B2></A>')) element = ElementTree.Element('schema') comment = ElementTree.Comment('nothing important') pi = ElementTree.ProcessingInstruction('action', 'nothing to do') document_node = XPathContext(document).root context = XPathContext(element) element_node = context.root attribute_node = AttributeNode('id', '0212349350') namespace_node = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') comment_node = CommentNode(comment) pi_node = ProcessingInstructionNode(pi) text_node = TextNode('betelgeuse') self.assertEqual(token.string_value(document_node), '123456789') self.assertEqual(token.string_value(element_node), '') self.assertEqual(token.string_value(attribute_node), '0212349350') self.assertEqual(token.string_value(namespace_node), 'http://www.w3.org/2001/XMLSchema') self.assertEqual(token.string_value(comment_node), 'nothing important') self.assertEqual(token.string_value(pi_node), 'action nothing to do') self.assertEqual(token.string_value(text_node), 'betelgeuse') self.assertEqual(token.string_value(None), '') self.assertEqual(token.string_value(Decimal(+1999)), '1999') self.assertEqual(token.string_value(Decimal('+1999')), '1999') self.assertEqual(token.string_value(Decimal('+19.0010')), '19.001') self.assertEqual(token.string_value(10), '10') self.assertEqual(token.string_value(1e99), '1E99') self.assertEqual(token.string_value(1e-05), '1E-05') self.assertEqual(token.string_value(1.00), '1') self.assertEqual(token.string_value(+19.0010), '19.001') self.assertEqual(token.string_value(float('nan')), 'NaN') self.assertEqual(token.string_value(float('inf')), 'INF') self.assertEqual(token.string_value(float('-inf')), '-INF') self.assertEqual(token.string_value(()), '()') tagged_object = Tagged() self.assertEqual(token.string_value(tagged_object), "Tagged(tag='root')") with patch.multiple(DummyXsdType, is_simple=lambda x: True): xsd_type = DummyXsdType() element.text = '10' typed_elem = ElementNode(elem=element, xsd_type=xsd_type) self.assertEqual(token.string_value(typed_elem), '10') self.assertEqual(token.data_value(typed_elem), 10)
def argenta_csv_to_ofx(csv_filename): with open(csv_filename) as csv_file: csv_reader = spamreader = csv.reader(csv_file, delimiter=';', quoting=csv.QUOTE_NONE) account_label, account_number, account_descr = next(csv_reader) account_number = account_number.replace(' ', '') assert account_label == 'Nr v/d rekening :' assert len(account_number) == 16 assert account_number[:4] == 'BE42' root = ET.Element('OFX') messages = ET.SubElement(root, 'BANKMSGSRSV1') statement_transaction = ET.SubElement(messages, 'STMTTRNRS') statement = ET.SubElement(statement_transaction, 'STMTRS') ET.SubElement(statement, 'CURDEF').text = DEFAULT_CURRENCY from_account = ET.SubElement(statement, 'BANKACCTFROM') convert_account(from_account, account_number[4:7], account_number[7:14], account_number[14:]) next(csv_reader) transfers = ET.SubElement(statement, 'BANKTRANLIST') for row in csv_reader: (valuta_date, reference, description, amount, currency, date, to_account_number, to_name, comment1, comment2) = row assert currency == 'EUR' amount = amount.replace('.', '').replace(',', '.') transaction = ET.SubElement(transfers, 'STMTTRN') transaction_type = ET.SubElement(transaction, 'TRNTYPE') transaction_type.text = 'DEBIT' if amount[0] == '-' else 'CREDIT' ET.SubElement(transaction, 'TRNUID').text = reference ET.SubElement(transaction, 'TRNAMT').text = amount if currency != DEFAULT_CURRENCY: ET.SubElement(transaction, 'CURRENCY').text = currency to_account = ET.SubElement(transaction, 'BANKACCTTO') convert_account(to_account, *to_account_number.split('-')) ET.SubElement(transaction, 'DTPOSTED').text = convert_date(date) ET.SubElement(transaction, 'NAME').text = to_name ET.SubElement(transaction, 'MEMO').text = comment1 + comment2 ofx_filename = csv_filename.rsplit('.')[0] + '.ofx' with open(ofx_filename, 'wb') as ofx_file: pi_text = ' '.join('{}="{}"'.format(key, value) for key, value in OFX_DECLARATION) pi = ET.ProcessingInstruction('ofx', pi_text) pi_tree = ET.ElementTree(pi) pi_tree.write(ofx_file, xml_declaration=True) tree = ET.ElementTree(root) tree.write(ofx_file, xml_declaration=False)
def format_request(request_type, request_items=None, qbxml_version='13.0', on_error=STOP_ON_ERROR): 'Format request as QBXML' if not request_items: request_items = dict() section = ET.Element(request_type) if hasattr(request_items, 'items'): request_items = request_items.items() for key, value in request_items: section.extend(format_request_part(key, value)) body = ET.Element('QBXMLMsgsRq', onError=on_error) body.append(section) document = ET.Element('QBXML') document.append(body) elements = [ ET.ProcessingInstruction('xml', 'version="1.0"'), ET.ProcessingInstruction('qbxml', 'version="{}"'.format(qbxml_version)), document, ] request = ''.join(ET.tostring(x) for x in elements) return minidom.parseString(request).toprettyxml(indent=" ")
def test_node_kind_function(self): document = ElementTree.parse(io.StringIO(u'<A/>')) element = ElementTree.Element('schema') attribute = AttributeNode('id', '0212349350') namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') comment = ElementTree.Comment('nothing important') pi = ElementTree.ProcessingInstruction('action', 'nothing to do') text = u'betelgeuse' self.assertEqual(node_kind(document), 'document') self.assertEqual(node_kind(element), 'element') self.assertEqual(node_kind(attribute), 'attribute') self.assertEqual(node_kind(namespace), 'namespace') self.assertEqual(node_kind(comment), 'comment') self.assertEqual(node_kind(pi), 'processing-instruction') self.assertEqual(node_kind(text), 'text') self.assertIsNone(node_kind(None)) self.assertIsNone(node_kind(10))
def test_data_value_function(self): token = self.parser.parse('true()') if self.parser.version != '1.0': xsd_type = DummyXsdType() context = XPathContext(ElementTree.XML('<age>19</age>')) context.root.xsd_type = xsd_type self.assertEqual(token.data_value(context.root), 19) context = XPathContext(ElementTree.XML('<dummy/>')) obj = AttributeNode('age', '19') self.assertEqual(token.data_value(obj), UntypedAtomic('19')) obj = NamespaceNode('tns', 'http://xpath.test/ns') self.assertEqual(token.data_value(obj), 'http://xpath.test/ns') obj = TextNode('19') self.assertEqual(token.data_value(obj), UntypedAtomic('19')) obj = ElementTree.XML('<root>a<e1>b</e1>c<e2>d</e2>e</root>') element_node = ElementNode(obj) self.assertEqual(token.data_value(element_node), UntypedAtomic('abcde')) obj = ElementTree.parse( io.StringIO('<root>a<e1>b</e1>c<e2>d</e2>e</root>')) document_node = DocumentNode(obj) self.assertEqual(token.data_value(document_node), UntypedAtomic('abcde')) obj = ElementTree.Comment("foo bar") comment_node = CommentNode(obj) self.assertEqual(token.data_value(comment_node), 'foo bar') obj = ElementTree.ProcessingInstruction('action', 'nothing to do') pi_node = ProcessingInstructionNode(obj) self.assertEqual(token.data_value(pi_node), 'action nothing to do') self.assertIsNone(token.data_value(None)) self.assertEqual(token.data_value(19), 19) self.assertEqual(token.data_value('19'), '19') self.assertFalse(token.data_value(False)) # Does not check type of non nodes, simply returns the object. tagged_object = Tagged() self.assertIs(token.data_value(tagged_object), tagged_object)
def test_string_value_function(self): token = self.parser.parse('true()') document = ElementTree.parse(io.StringIO(u'<A>123<B1>456</B1><B2>789</B2></A>')) element = ElementTree.Element('schema') attribute = AttributeNode('id', '0212349350') namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') comment = ElementTree.Comment('nothing important') pi = ElementTree.ProcessingInstruction('action', 'nothing to do') text = u'betelgeuse' self.assertEqual(token.string_value(document), '123456789') self.assertEqual(token.string_value(element), '') self.assertEqual(token.string_value(attribute), '0212349350') self.assertEqual(token.string_value(namespace), 'http://www.w3.org/2001/XMLSchema') self.assertEqual(token.string_value(comment), 'nothing important') self.assertEqual(token.string_value(pi), 'action nothing to do') self.assertEqual(token.string_value(text), 'betelgeuse') self.assertEqual(token.string_value(None), '') self.assertEqual(token.string_value(10), '10')
def test_data_value_function(self): token = self.parser.parse('true()') if self.parser.version != '1.0': with patch.multiple(DummyXsdType(), is_simple=lambda x: False, has_simple_content=lambda x: True) as xsd_type: obj = TypedElement(ElementTree.XML('<age>19</age>'), xsd_type, 19) self.assertEqual(token.data_value(obj), 19) obj = AttributeNode('age', '19') self.assertEqual(token.data_value(obj), UntypedAtomic('19')) obj = NamespaceNode('tns', 'http://xpath.test/ns') self.assertEqual(token.data_value(obj), 'http://xpath.test/ns') obj = TextNode('19') self.assertEqual(token.data_value(obj), UntypedAtomic('19')) obj = ElementTree.XML('<root>a<e1>b</e1>c<e2>d</e2>e</root>') self.assertEqual(token.data_value(obj), UntypedAtomic('abcde')) obj = ElementTree.parse( io.StringIO('<root>a<e1>b</e1>c<e2>d</e2>e</root>')) self.assertEqual(token.data_value(obj), UntypedAtomic('abcde')) obj = ElementTree.Comment("foo bar") self.assertEqual(token.data_value(obj), 'foo bar') obj = ElementTree.ProcessingInstruction('action', 'nothing to do') self.assertEqual(token.data_value(obj), 'action nothing to do') self.assertIsNone(token.data_value(None)) self.assertEqual(token.data_value(19), 19) self.assertEqual(token.data_value('19'), '19') self.assertFalse(token.data_value(False)) tagged_object = Tagged() self.assertIsNone(token.data_value(tagged_object))
def show_xml(document: Dict[str, Any]) -> None: from xml.etree import ElementTree as XML xml_document = XML.Element("results") legs_xml = XML.SubElement(xml_document, "legs") for n, leg in enumerate(document["legs"], start=1): leg_xml = XML.SubElement(legs_xml, "leg", n=str(n)) leg_xml.text = leg teams_xml = XML.SubElement(xml_document, "teams") for team in document["teams"]: team_xml = XML.SubElement(teams_xml, "team") name_xml = XML.SubElement(team_xml, "name") name_xml.text = team["name"] position_xml = XML.SubElement(team_xml, "position") for n, position in enumerate(team["position"], start=1): leg_xml = XML.SubElement(position_xml, "leg", n=str(n)) leg_xml.text = str(position) pi = XML.ProcessingInstruction("xml", 'version="1.0"') XML.dump(pi) XML.dump(xml_document)
def onInputFile(self, value: InputFile): output = self.path / value.path.with_suffix(".xml") # TODO: Should make it relative # print ("Processing", value.path, output) root = value.value # We augment the root with useful meta information root.attrib["base"] = str(self.path) root.attrib["path"] = str(value.path.with_suffix(".xml")) root.attrib["id"] = os.path.splitext(root.attrib["path"])[0] #tree = ElementTree.Element("doc") xslpi = ElementTree.ProcessingInstruction("xsl-stylesheet") #tree.append(root) output.parent.mkdir(parents=True, exist_ok=True) # TODO: ElemenTree really sucks at managing proper XML, it's not # possible to add the xsl-stylesheet PI at the root # level so we need to do it manually. res = ElementTree.tostring(root, method="xml") xml_header = b'<?xml version="1.0" encoding="utf8"?>\n' xsl_header = f'<?xml-stylesheet type="text/xsl" media="screen" href="{os.path.relpath(self.xsl,output.parent)}"?>\n'.encode( "utf8") with open(output, "wb") as f: f.write(xml_header) f.write(xsl_header) f.write(res)
class OpenPackagingConvention(FileInterface): # Some constants related to this format. _xml_header = ET.ProcessingInstruction("xml", "version=\"1.0\" encoding=\"UTF-8\"") # Header element being put atop every XML file. _content_types_file = "/[Content_Types].xml" # Where the content types file is. _global_metadata_file = "/Metadata/OPC_Global.json" # Where the global metadata file is. _opc_metadata_relationship_type = "http://schemas.ultimaker.org/package/2018/relationships/opc_metadata" # Unique identifier of the relationship type that relates OPC metadata to files. _metadata_prefix = "/metadata" _aliases = OrderedDict([]) # type: Dict[str, str] # A standard OPC file doest not have default aliases. These must be implemented in inherited classes. mime_type = "application/x-opc" ## Initialises the fields of this class. def __init__(self) -> None: self._mode = None # type: Optional[OpenMode] # Whether we're in read or write mode. self._stream = None # type: Optional[IO[bytes]] # The currently open stream. self._zipfile = None # type: Optional[zipfile.ZipFile] # The zip interface to the currently open stream. self._metadata = {} # type: Dict[str, Any] # The metadata in the currently open file. self._content_types_element = None # type: Optional[ET.Element] # An XML element holding all the content types. self._relations = {} # type: Dict[str, ET.Element] # For each virtual path, a relations XML element (which is left out of the file if empty). self._open_bytes_streams = {} # type: Dict[str, IO[bytes]] # With old Python versions, the currently open BytesIO streams that need to be flushed, by their virtual path. # The zipfile module may only have one write stream open at a time. So when you open a new stream, close the previous one. self._last_open_path = None # type: Optional[str] self._last_open_stream = None # type: Optional[IO[bytes]] def openStream(self, stream: IO[bytes], mime: str = "application/x-opc", mode: OpenMode = OpenMode.ReadOnly) -> None: self._mode = mode self._stream = stream # A copy in case we need to rewind for toByteArray. We should mostly be reading via self._zipfile. self._zipfile = zipfile.ZipFile(self._stream, self._mode.value, compression=zipfile.ZIP_DEFLATED) self._readContentTypes() # Load or create the content types element. self._readRels() # Load or create the relations. self._readMetadata() # Load the metadata, if any. def close(self) -> None: if not self._stream: raise ValueError("This file is already closed.") if self._zipfile is None: return self.flush() self._zipfile.close() def flush(self) -> None: if not self._stream: raise ValueError("Can't flush a closed file.") assert self._zipfile is not None if self._mode == OpenMode.ReadOnly: return # No need to flush reading of zip archives as they are blocking calls. if self._last_open_stream is not None and self._last_open_path not in self._open_bytes_streams: self._last_open_stream.close() # If using old Python versions (<= 3.5), the write streams were kept in memory to be written all at once when flushing. for virtual_path, stream in self._open_bytes_streams.items(): stream.seek(0) self._zipfile.writestr(virtual_path, stream.read()) stream.close() self._writeMetadata() # Metadata must be updated first, because that adds rels and a content type. self._writeContentTypes() self._writeRels() def listPaths(self) -> List[str]: if not self._stream: raise ValueError("Can't list the paths in a closed file.") paths = [self._zipNameToVirtualPath(zip_name) for zip_name in self._zipfile.namelist()] return list(self._metadata.keys()) + paths def getData(self, virtual_path: str) -> Dict[str, Any]: if not self._stream: raise ValueError("Can't get data from a closed file.") assert self._zipfile is not None if self._mode == OpenMode.WriteOnly: raise WriteOnlyError(virtual_path) result = {} # type: Dict[str, Any] if virtual_path.startswith(self._metadata_prefix): result = self.getMetadata(virtual_path[len(self._metadata_prefix):]) else: canonical_path = self._processAliases(virtual_path) if self._resourceExists(canonical_path): result[virtual_path] = self.getStream( canonical_path).read() # In case of a name clash, the file wins. But that shouldn't be possible. return result def setData(self, data: Dict[str, Any]) -> None: if not self._stream: raise ValueError("Can't change the data in a closed file.") if self._mode == OpenMode.ReadOnly: raise ReadOnlyError() for virtual_path, value in data.items(): if virtual_path.startswith( self._metadata_prefix): # Detect metadata by virtue of being in the Metadata folder. self.setMetadata({virtual_path: value[len(self._metadata_prefix):]}) else: # Virtual file resources. self.getStream(virtual_path).write(value) def getMetadata(self, virtual_path: str) -> Dict[str, Any]: if not self._stream: raise ValueError("Can't get metadata from a closed file.") assert self._zipfile is not None if self._mode == OpenMode.WriteOnly: raise WriteOnlyError(virtual_path) canonical_path = self._processAliases(virtual_path) # Find all metadata that begins with the specified virtual path! result = {} if canonical_path in self._metadata: # The exact match. result[self._metadata_prefix + virtual_path] = self._metadata[canonical_path] for entry_path, value in self._metadata.items(): # We only want to match subdirectories of the provided virtual paths. # So if you provide "/foo" then we don't want to match on "/foobar" # but we do want to match on "/foo/zoo". This is why we check if they # start with the provided virtual path plus a slash. if entry_path.startswith(canonical_path + "/"): # We need to return the originally requested alias, so replace the canonical path with the virtual path. result[self._metadata_prefix + virtual_path + "/" + entry_path[len(canonical_path) + 1:]] = value # If requesting the size of a file. if canonical_path.endswith("/size"): requested_resource = canonical_path[:-len("/size")] if self._resourceExists(requested_resource): result[self._metadata_prefix + virtual_path] = self._zipfile.getinfo( requested_resource.strip("/")).file_size return result def setMetadata(self, metadata: Dict[str, Any]) -> None: if not self._stream: raise ValueError("Can't change metadata in a closed file.") if self._mode == OpenMode.ReadOnly: raise ReadOnlyError() metadata = {self._processAliases(virtual_path): metadata[virtual_path] for virtual_path in metadata} self._metadata.update(metadata) def getStream(self, virtual_path: str) -> IO[bytes]: if not self._stream: raise ValueError("Can't get a stream from a closed file.") assert self._zipfile is not None assert self._mode is not None if virtual_path.startswith("/_rels"): raise OPCError("Writing directly to a relationship file is forbidden.") if virtual_path.startswith(self._metadata_prefix): return BytesIO(json.dumps(self.getMetadata(virtual_path[len(self._metadata_prefix):])).encode("UTF-8")) virtual_path = self._processAliases(virtual_path) if not self._resourceExists(virtual_path) and self._mode == OpenMode.ReadOnly: # In write-only mode, create a new file instead of reading metadata. raise FileNotFoundError(virtual_path) # The zipfile module may only have one write stream open at a time. So when you open a new stream, close the previous one. if self._last_open_stream is not None and self._last_open_path not in self._open_bytes_streams: # Don't close streams that we still need to flush. self._last_open_stream.close() # If we are requesting a stream of an image resized, resize the image and return that. if self._mode == OpenMode.ReadOnly and ".png/" in virtual_path: png_file = virtual_path[:virtual_path.find(".png/") + 4] size_spec = virtual_path[virtual_path.find(".png/") + 5:] if re.match(r"^\s*\d+\s*x\s*\d+\s*$", size_spec): dimensions = [] for dimension in re.finditer(r"\d+", size_spec): dimensions.append(int(dimension.group())) return self._resizeImage(png_file, dimensions[0], dimensions[1]) self._last_open_path = virtual_path try: # If it happens to match some existing PNG file, we have to rescale that file and return the result. self._last_open_stream = self._zipfile.open(virtual_path, self._mode.value) except RuntimeError: # Python 3.5 and before couldn't open resources in the archive in write mode. self._last_open_stream = BytesIO() self._open_bytes_streams[virtual_path] = self._last_open_stream # Save this for flushing later. return self._last_open_stream def toByteArray(self, offset: int = 0, count: int = -1) -> bytes: if not self._stream: raise ValueError("Can't get the bytes from a closed file.") if self._mode == OpenMode.WriteOnly: raise WriteOnlyError() assert self._zipfile is not None assert self._mode is not None self._zipfile.close() # Close the zipfile first so that we won't be messing with the stream without its consent. self._stream.seek(offset) result = self._stream.read(count) self._zipfile = zipfile.ZipFile(self._stream, self._mode.value, compression=zipfile.ZIP_DEFLATED) return result ## Adds a new content type to the archive. # \param extension The file extension of the type def addContentType(self, extension: str, mime_type: str) -> None: if not self._stream: raise ValueError("Can't add a content type to a closed file.") if self._mode == OpenMode.ReadOnly: raise ReadOnlyError() assert self._content_types_element is not None # First check if it already exists. for content_type in self._content_types_element.iterfind("Default"): if "Extension" in content_type.attrib and content_type.attrib["Extension"] == extension: raise OPCError("Content type for extension {extension} already exists.".format(extension=extension)) ET.SubElement(self._content_types_element, "Default", Extension=extension, ContentType=mime_type) ## Adds a relation concerning a file type. # \param virtual_path The target file that the relation is about. # \param relation_type The type of the relation. Any reader of OPC should # be able to understand all types that are added via relations. # \param origin The origin of the relation. If the relation concerns a # specific directory or specific file, then you should point to the # virtual path of that file here. def addRelation(self, virtual_path: str, relation_type: str, origin: str = "") -> None: if not self._stream: raise ValueError("Can't add a relation to a closed file.") if self._mode == OpenMode.ReadOnly: raise ReadOnlyError(virtual_path) virtual_path = self._processAliases(virtual_path) # First check if it already exists. if origin not in self._relations: self._relations[origin] = ET.Element("Relationships", xmlns="http://schemas.openxmlformats.org/package/2006/relationships") else: for relationship in self._relations[origin].iterfind("Relationship"): if "Target" in relationship.attrib and relationship.attrib["Target"] == virtual_path: raise OPCError("Relation for virtual path {target} already exists.".format(target=virtual_path)) # Find a unique name. unique_id = 0 while True: for relationship in self._relations[origin].iterfind("Relationship"): if "Id" in relationship.attrib and relationship.attrib["Id"] == "rel" + str(unique_id): break else: # Unique ID didn't exist yet! It's safe to use break unique_id += 1 unique_name = "rel" + str(unique_id) # Create the element itself. ET.SubElement(self._relations[origin], "Relationship", Target=virtual_path, Type=relation_type, Id=unique_name) ## Figures out if a resource exists in the archive. # # This will not match on metadata, only on normal resources. # \param virtual_path: The path to test for. # \return ``True`` if it exists as a normal resource, or ``False`` if it # doesn't. def _resourceExists(self, virtual_path: str) -> bool: assert self._zipfile is not None for zip_name in self._zipfile.namelist(): zip_virtual_path = self._zipNameToVirtualPath(zip_name) if virtual_path == zip_virtual_path: return True if zip_virtual_path.endswith(".png") and virtual_path.startswith( zip_virtual_path + "/"): # We can rescale PNG images if you want. if re.match(r"^\s*\d+\s*x\s*\d+\s*$", virtual_path[len( zip_virtual_path) + 1:]): # Matches the form "NxM" with optional whitespace. return True return False ## Dereference the aliases for OPC files. # # This also adds a slash in front of every virtual path if it has no slash # yet, to allow referencing virtual paths with or without the initial # slash. def _processAliases(self, virtual_path: str) -> str: if not virtual_path.startswith("/"): virtual_path = "/" + virtual_path # Replace all aliases. for regex, replacement in self._aliases.items(): if regex.startswith("/"): expression = r"^" + regex else: expression = regex virtual_path = re.sub(expression, replacement, virtual_path) return virtual_path ## Convert the resource name inside the zip to a virtual path as this # library specifies it should be. # \param zip_name The name in the zip file according to zipfile module. # \return The virtual path of that resource. def _zipNameToVirtualPath(self, zip_name: str) -> str: if not zip_name.startswith("/"): return "/" + zip_name return zip_name ## Resize an image to the specified dimensions. # # For now you may assume that the input image is PNG formatted. # \param virtual_path The virtual path pointing to an image in the # zipfile. # \param width The desired width of the image. # \param height The desired height of the image. # \return A bytes stream representing a new PNG image with the desired # width and height. def _resizeImage(self, virtual_path: str, width: int, height: int) -> IO[bytes]: input = self.getStream(virtual_path) try: from PyQt5.QtGui import QImage from PyQt5.QtCore import Qt, QBuffer image = QImage() image.loadFromData(input.read()) image = image.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation) output_buffer = QBuffer() output_buffer.open(QBuffer.ReadWrite) image.save(output_buffer, "PNG") output_buffer.seek(0) # Reset that buffer so that the next guy can request it. return BytesIO(output_buffer.readAll()) except ImportError: # TODO: Try other image loaders. raise # Raise import error again if we find no other image loaders. #### Below follow some methods to read/write components of the archive. #### ## When loading a file, load the relations from the archive. # # If the relations are missing, empty elements are created. def _readRels(self) -> None: assert self._zipfile is not None self._relations[""] = ET.Element("Relationships", xmlns="http://schemas.openxmlformats.org/package/2006/relationships") # There must always be a global relationships document. # Below is some parsing of paths and extensions. # Normally you'd use os.path for this. But this is platform-dependent. # For instance, the path separator in Windows is a backslash, but zipfile still uses a slash on Windows. # So instead we have custom implementations here. Sorry. for virtual_path in self._zipfile.namelist(): virtual_path = self._zipNameToVirtualPath(virtual_path) if not virtual_path.endswith(".rels"): # We only want to read rels files. continue directory = virtual_path[:virtual_path.rfind("/")] # Before the last slash. if directory != "_rels" and not directory.endswith("/_rels"): # Rels files must be in a directory _rels. continue document = ET.fromstring(self._zipfile.open(virtual_path).read()) # Find out what file or directory this relation is about. origin_filename = virtual_path[virtual_path.rfind("/") + 1:-len( ".rels")] # Just the filename (no path) and without .rels extension. origin_directory = directory[ :-len("/_rels")] # The parent path. We already know it's in the _rels directory. origin = (origin_directory + "/" if (origin_directory != "") else "") + origin_filename self._relations[origin] = document ## At the end of writing a file, write the relations to the archive. # # This should be written at the end of writing an archive, when all # relations are known. def _writeRels(self) -> None: assert self._zipfile is not None # Below is some parsing of paths and extensions. # Normally you'd use os.path for this. But this is platform-dependent. # For instance, the path separator in Windows is a backslash, but zipfile still uses a slash on Windows. # So instead we have custom implementations here. Sorry. for origin, element in self._relations.items(): # Find out where to store the rels file. if "/" not in origin: # Is in root. origin_directory = "" origin_filename = origin else: origin_directory = origin[:origin.rfind("/")] origin_filename = origin[origin.rfind("/") + 1:] relations_file = origin_directory + "/_rels/" + origin_filename + ".rels" self._indent(element) self._zipfile.writestr(relations_file, ET.tostring(self._xml_header) + b"\n" + ET.tostring(element)) ## When loading a file, load the content types from the archive. # # If the content types are missing, an empty element is created. def _readContentTypes(self) -> None: assert self._zipfile is not None if self._content_types_file in self._zipfile.namelist(): content_types_element = ET.fromstring(self._zipfile.open(self._content_types_file).read()) if content_types_element: self._content_types_element = content_types_element if not self._content_types_element: self._content_types_element = ET.Element("Types", xmlns="http://schemas.openxmlformats.org/package/2006/content-types") # If there is no type for the .rels file, create it. if self._mode != OpenMode.ReadOnly: for type_element in self._content_types_element.iterfind( "{http://schemas.openxmlformats.org/package/2006/content-types}Default"): if "Extension" in type_element.attrib and type_element.attrib["Extension"] == "rels": break else: ET.SubElement(self._content_types_element, "Default", Extension="rels", ContentType="application/vnd.openxmlformats-package.relationships+xml") ## At the end of writing a file, write the content types to the archive. # # This should be written at the end of writing an archive, when all # content types are known. def _writeContentTypes(self) -> None: assert self._zipfile is not None assert self._content_types_element is not None self._indent(self._content_types_element) self._zipfile.writestr(self._content_types_file, ET.tostring(self._xml_header) + b"\n" + ET.tostring(self._content_types_element)) ## When loading a file, read its metadata from the archive. # # This depends on the relations! Read the relations first! def _readMetadata(self) -> None: assert self._zipfile is not None for origin, relations_element in self._relations.items(): for relationship in relations_element.iterfind( "{http://schemas.openxmlformats.org/package/2006/relationships}Relationship"): if "Target" not in relationship.attrib or "Type" not in relationship.attrib: # These two are required, and we actually need them here. Better ignore this one. continue if relationship.attrib[ "Type"] != self._opc_metadata_relationship_type: # Not interested in this one. It's not metadata that we recognise. continue metadata_file = relationship.attrib["Target"] if metadata_file not in self._zipfile.namelist(): # The metadata file is unknown to us. continue metadata = json.loads(self._zipfile.open(metadata_file).read().decode("utf-8")) if metadata_file == self._global_metadata_file: # Store globals as if coming from root. metadata_file = "" elif metadata_file.endswith( ".json"): # Metadata files should be named <filename.ext>.json, meaning that they are metadata about <filename.ext>. metadata_file = metadata_file[:-len(".json")] self._readMetadataElement(metadata, metadata_file) if self._mode != OpenMode.WriteOnly and not self.getMetadata("/3D/model.gcode"): try: # Check if the G-code file actually exists in the package. self._zipfile.getinfo("/3D/model.gcode") except KeyError: return gcode_stream = self._zipfile.open("/3D/model.gcode") header_data = GCodeFile.parseHeader(gcode_stream, prefix="/3D/model.gcode/") self._metadata.update(header_data) ## Reads a single node of metadata from a JSON document (recursively). # \param element The node in the JSON document to read. # \param current_path The path towards the current document. def _readMetadataElement(self, element: Dict[str, Any], current_path: str) -> None: for key, value in element.items(): if isinstance(value, dict): # json structures stuff in dicts if it is a subtree. self._readMetadataElement(value, current_path + "/" + key) else: self._metadata[current_path + "/" + key] = value ## At the end of writing a file, write the metadata to the archive. # # This should be written at the end of writing an archive, when all # metadata is known. # # ALWAYS WRITE METADATA BEFORE UPDATING RELS AND CONTENT TYPES. def _writeMetadata(self) -> None: assert self._zipfile is not None keys_left = set( self._metadata.keys()) # The keys that are not associated with a particular file (global metadata). metadata_per_file = {} # type: Dict[str, Dict[str, Any]] for file_name in self._zipfile.namelist(): metadata_per_file[file_name] = {} for metadata_key in self._metadata: if metadata_key.startswith(file_name + "/"): # Strip the prefix: "/a/b/c.stl/print_time" becomes just "print_time" about the file "/a/b/c.stl". metadata_per_file[file_name][metadata_key[len(file_name) + 1:]] = self._metadata[metadata_key] keys_left.remove(metadata_key) # keys_left now contains only global metadata keys. global_metadata = {key: self._metadata[key] for key in keys_left} if len(global_metadata) > 0: self._writeMetadataToFile(global_metadata, self._global_metadata_file) self.addRelation(self._global_metadata_file, self._opc_metadata_relationship_type) for file_name, metadata in metadata_per_file.items(): if len(metadata) > 0: self._writeMetadataToFile(metadata, file_name + ".json") self.addRelation(file_name + ".json", self._opc_metadata_relationship_type) if len(self._metadata) > 0: # If we've written any metadata at all, we must include the content type as well. try: self.addContentType(extension="json", mime_type="text/json") except OPCError: # User may already have defined this content type himself. pass ## Writes one dictionary of metadata to a JSON file. # \param metadata The metadata dictionary to write. # \param file_name The virtual path of the JSON file to write to. def _writeMetadataToFile(self, metadata: Dict[str, Any], file_name: str) -> None: assert self._zipfile is not None # Split the metadata into a hierarchical structure. document = {} # type: Dict[str, Any] for key, value in metadata.items(): key = key.strip("/") # TODO: Should paths ending in a slash give an error? path = key.split("/") current_element = document for element in path: if element not in current_element: current_element[element] = {} current_element = current_element[element] current_element[""] = value # We've created some empty-string keys to allow values to occur next to subelements. # If this empty-string key is the only key inside a node, fold it in to be just the value. for key in metadata: key = key.strip("/") path = key.split("/") current_element = document parent = document for element in path: parent = current_element current_element = current_element[element] if len(current_element) == 1: # The empty string is the only element. assert "" in current_element parent[path[-1]] = current_element[""] # Fold down the singleton dictionary. self._zipfile.writestr(file_name, json.dumps(document, sort_keys=True, indent=4)) ## Helper method to write data directly into an aliased path. def _writeToAlias(self, path_alias: str, package_filename: str, file_data: bytes) -> None: stream = self.getStream("{}/{}".format(path_alias, package_filename)) stream.write(file_data) ## Helper method to ensure a relationship exists. # Creates the relationship if it does not exists, ignores an OPC error if it already does. def _ensureRelationExists(self, virtual_path: str, relation_type: str, origin: str) -> None: try: # We try to add the relation. If this throws an OPCError, we know the relation already exists and ignore it. self.addRelation(virtual_path, relation_type, origin) except OPCError: pass ## Helper function for pretty-printing XML because ETree is stupid. # # Source: https://stackoverflow.com/questions/749796/pretty-printing-xml-in-python def _indent(self, elem: ET.Element, level: int = 0) -> None: i = "\n" + level * " " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: self._indent(elem, level + 1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i
def test_is_processing_instruction_node_function(self): pi = ElementTree.ProcessingInstruction('action', 'nothing to do') self.assertTrue(is_processing_instruction_node(pi)) self.assertFalse(is_processing_instruction_node(self.elem))