def test_plain_parse(self): """Parse plain text""" isrc = inputsource(SOURCE1) doc = parse_fragment(isrc) self.assertEqual(EXPECTED1, doc.xml_encode()) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) first_child = doc.xml_children[0] self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode) self.assertEqual(first_child.xml_qname, u'p') self.assertEqual(first_child.xml_namespace, None) self.assertEqual(first_child.xml_prefix, None,)
def test_parse_overridden_default_namespace_reoverridden_child(self): """Parse with overridden default namespace and re-overridden child""" nss = {u'xml': u'http://www.w3.org/XML/1998/namespace', None: u'http://www.w3.org/1999/xhtml'} isrc = inputsource(SOURCE2) doc = parse_fragment(isrc, nss) self.assertEqual(EXPECTED3, doc.xml_encode()) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) first_child = doc.xml_children[0] self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode) self.assertEqual(first_child.xml_qname, u'p') self.assertEqual(first_child.xml_namespace, u'http://www.w3.org/1999/xhtml') self.assertEqual(first_child.xml_prefix, None,)
def test_parse_overridden_non_default_namespace(self): """Parse with overridden non-default namespace""" nss = {u'xml': u'http://www.w3.org/XML/1998/namespace', u'h': u'http://www.w3.org/1999/xhtml'} isrc = inputsource(SOURCE3) doc = parse_fragment(isrc, nss) self.assertEqual(EXPECTED4, doc.xml_encode()) #doc = parse_fragment(TEST_STRING) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) first_child = doc.xml_children[0] self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode) self.assertEqual(first_child.xml_qname, u'h:p') self.assertEqual(first_child.xml_namespace, u'http://www.w3.org/1999/xhtml') self.assertEqual(first_child.xml_prefix, u'h')
def feed(self, obj, prefixes=None): """ Feed a structure to the writer. The structure is interpreted as XML and serialized. obj - XML node proxy structure (or iterator thereof), such as amara.writers.struct.ROOT (proxy for a root (entity) node) or amara.writers.struct.E (proxy for an element). See documentation for other proxy node classes """ prefixes = prefixes or {} if isinstance(obj, ROOT): self.printer.start_document() for subobj in obj.content: self.feed(subobj) self.printer.end_document() return if isinstance(obj, NS): return if isinstance(obj, RAW): #parse_frag returns an entity ent = parse_fragment(inputsource.text(obj.content)) from amara.writers._treevisitor import visitor v = visitor(printer=self.printer) for child in ent.xml_children: v.visit(child) return if isinstance(obj, E): #First attempt used tee. Seems we ran into the warning at #http://www.python.org/doc/2.4.3/whatsnew/node13.html #"Note that tee() has to keep copies of the values returned by the iterator; #in the worst case, it may need to keep all of them. #This should therefore be used carefully if the leading iterator can run #far ahead of the trailing iterator in a long stream of inputs. #If the separation is large, then you might as well use list() instead. #When the iterators track closely with one another, tee()" is ideal. Possible #applications include bookmarking, windowing, or lookahead iterators. #(Contributed by Raymond Hettinger.)" #obj.namespaces = {} new_prefixes = [] lead = None content = iter(obj.content) for subobj in content: if isinstance(subobj, NS): new_prefixes.append((subobj.prefix, subobj.namespace)) else: lead = subobj break prefix, local = splitqname(obj.qname) prefix = prefix or u'' if obj.ns == UNSPECIFIED_NAMESPACE: obj.ns = prefixes.get(prefix, u'') elif prefix not in prefixes or prefixes[prefix] != obj.ns: new_prefixes.append((prefix, obj.ns or u'')) attrs = [a for a in obj.attributes.itervalues() ] if obj.attributes else () if new_prefixes: prefixes = prefixes.copy() prefixes.update(dict(new_prefixes)) self.printer.start_element(obj.ns, obj.qname, new_prefixes, attrs) if lead: self.feed(lead, prefixes) for subobj in content: self.feed(subobj, prefixes) self.printer.end_element(obj.ns, obj.qname) return if isinstance(obj, basestring): self.printer.text(U(obj)) return if isinstance(obj, tree.element): #Be smart about bindery nodes self.printer.text(unicode(obj)) return try: obj = iter(obj) except TypeError, e: if callable(obj): self.feed(obj(), prefixes) else: #Just try to make it text, i.e. punt self.feed(unicode(obj), prefixes)
def feed(self, obj, prefixes=None): """ Feed a structure to the writer. The structure is interpreted as XML and serialized. obj - XML node proxy structure (or iterator thereof), such as amara.writers.struct.ROOT (proxy for a root (entity) node) or amara.writers.struct.E (proxy for an element). See documentation for other proxy node classes """ prefixes = prefixes or {} if isinstance(obj, ROOT): self.printer.start_document() for subobj in obj.content: self.feed(subobj) self.printer.end_document() return if isinstance(obj, NS): return if isinstance(obj, RAW): #parse_frag returns an entity ent = parse_fragment(inputsource.text(obj.content)) from amara.writers._treevisitor import visitor v = visitor(printer=self.printer) for child in ent.xml_children: v.visit(child) return if isinstance(obj, E): #First attempt used tee. Seems we ran into the warning at #http://www.python.org/doc/2.4.3/whatsnew/node13.html #"Note that tee() has to keep copies of the values returned by the iterator; #in the worst case, it may need to keep all of them. #This should therefore be used carefully if the leading iterator can run #far ahead of the trailing iterator in a long stream of inputs. #If the separation is large, then you might as well use list() instead. #When the iterators track closely with one another, tee()" is ideal. Possible #applications include bookmarking, windowing, or lookahead iterators. #(Contributed by Raymond Hettinger.)" #obj.namespaces = {} new_prefixes = [] lead = None content = iter(obj.content) for subobj in content: if isinstance(subobj, NS): new_prefixes.append((subobj.prefix, subobj.namespace)) else: lead = subobj break prefix, local = splitqname(obj.qname) prefix = prefix or u'' if obj.ns == UNSPECIFIED_NAMESPACE: obj.ns = prefixes.get(prefix, u'') elif prefix not in prefixes or prefixes[prefix] != obj.ns: new_prefixes.append((prefix, obj.ns or u'')) attrs = [ a for a in obj.attributes.itervalues() ] if obj.attributes else () if new_prefixes: prefixes = prefixes.copy() prefixes.update(dict(new_prefixes)) self.printer.start_element(obj.ns, obj.qname, new_prefixes, attrs) if lead: self.feed(lead, prefixes) for subobj in content: self.feed(subobj, prefixes) self.printer.end_element(obj.ns, obj.qname) return if isinstance(obj, basestring): self.printer.text(U(obj)) return if isinstance(obj, tree.element): #Be smart about bindery nodes self.printer.text(unicode(obj)) return try: obj = iter(obj) except TypeError, e: if callable(obj): self.feed(obj(), prefixes) else: #Just try to make it text, i.e. punt self.feed(unicode(obj), prefixes)