def test_serialize_roundtrip(self): # Create a elements object tree tree = Body([ Section([Paragraph(["Hello"]), Paragraph(["World"])], ordinal="1", title="Main section"), Section([ 42, date(2013, 11, 27), datetime(2013, 11, 27, 12, 0, 0), b'bytestring', { 'foo': 'bar', 'x': 'y' } ], ordinal=2, title="Native types") ]) # roundtrip using the default XML format serialized = serialize(tree) self.assertIsInstance(serialized, str) newtree = deserialize(serialized, caller_globals=globals()) self.assertEqual(tree, newtree) # make another section with special (but commonly used) types # and try to roundtrip them. The XML serialization format does # not support this. graph = Graph().parse( data="""@prefix dcterms: <http://purl.org/dc/terms/> . <http://example.org/1> dcterms:title "Hello world"@en . """, format="turtle") parseresult = urlparser.parseString("http://example.org/1") tree.append(Section([parseresult, graph], meta=graph)) # roundtrip using JSON (which uses fully qualified classnames, # so we don't need to pass globals() into deserialize() serialized = serialize(tree, format="json") self.assertIsInstance(serialized, str) newtree = deserialize(serialized, format="json") # two pyparsing.ParseResult objects cannot be directly # compared (they don't implement __eq__), therefore we compare # their XML representations tree[2][0] = util.parseresults_as_xml(tree[2][0]) newtree[2][0] = util.parseresults_as_xml(newtree[2][0]) self.assertEqual(tree, newtree)
def test_serialize_roundtrip(self): # Create a elements object tree tree = Body([Section([Paragraph(["Hello"]), Paragraph(["World"])], ordinal="1", title="Main section"), Section([42, date(2013,11,27), datetime(2013,11,27,12,0,0), b'bytestring', {'foo': 'bar', 'x': 'y'}], ordinal=2, title="Native types") ]) # roundtrip using the default XML format serialized = serialize(tree) self.assertIsInstance(serialized, str) newtree = deserialize(serialized, caller_globals=globals()) self.assertEqual(tree, newtree) # make another section with special (but commonly used) types # and try to roundtrip them. The XML serialization format does # not support this. graph = Graph().parse(data="""@prefix dcterms: <http://purl.org/dc/terms/> . <http://example.org/1> dcterms:title "Hello world"@en . """, format="turtle") parseresult = urlparser.parseString("http://example.org/1") tree.append(Section([parseresult, graph], meta=graph)) # roundtrip using JSON (which uses fully qualified classnames, # so we don't need to pass globals() into deserialize() serialized = serialize(tree, format="json") self.assertIsInstance(serialized, str) newtree = deserialize(serialized, format="json") # two pyparsing.ParseResult objects cannot be directly # compared (they don't implement __eq__), therefore we compare # their XML representations tree[2][0] = util.parseresults_as_xml(tree[2][0]) newtree[2][0] = util.parseresults_as_xml(newtree[2][0]) self.assertEqual(tree, newtree)
def __serialize_xml(node, serialize_hidden_attrs=False): # print "serializing: %r" % node # Special handling of pyparsing.ParseResults -- deserializing of # these won't work (easily) if isinstance(node, pyparsing.ParseResults): xml = util.parseresults_as_xml(node) return ET.XML(xml) # We use type() instead of isinstance() because we want to # serialize str derived types using their correct class # names. This is now more involved since under py2, str is now # really future.types.newstr.newstr. if type(node) == str or (hasattr(builtins, 'unicode') and # means py2 + future type(node) == builtins.unicode): nodename = "str" elif type(node) == bytes: nodename = "bytes" else: nodename = node.__class__.__name__ e = ET.Element(nodename) if hasattr(node, '__dict__'): for key in [ x for x in list(node.__dict__.keys()) if serialize_hidden_attrs or not x.startswith('_') ]: val = node.__dict__[key] if val is None: continue if (isinstance(val, (str, bytes))): e.set(key, native(val)) elif isinstance(val, LayeredConfig): # FIXME: this is an # ugly hack to avoid # problems with # pdfreader.TextBox.font continue else: e.set(key, repr(val)) if isinstance(node, str): if node: e.text = str(node) elif isinstance(node, bytes): if node: e.text = node.decode() elif isinstance(node, int): e.text = str(node) elif isinstance(node, list): for x in node: e.append(__serialize_xml(x)) else: e.text = repr(node) # raise TypeError("Can't serialize %r (%r)" % (type(node), node)) return e
def __serialize_xml(node, serialize_hidden_attrs=False): # print "serializing: %r" % node # Special handling of pyparsing.ParseResults -- deserializing of # these won't work (easily) if isinstance(node, pyparsing.ParseResults): xml = util.parseresults_as_xml(node) return ET.XML(xml) # We use type() instead of isinstance() because we want to # serialize str derived types using their correct class # names. This is now more involved since under py2, str is now # really future.types.newstr.newstr. if type(node) == str or (hasattr(builtins, 'unicode') and # means py2 + future type(node) == builtins.unicode): nodename = "str" elif type(node) == bytes: nodename = "bytes" else: nodename = node.__class__.__name__ e = ET.Element(nodename) if hasattr(node, '__dict__'): for key in [ x for x in list(node.__dict__.keys()) if serialize_hidden_attrs or not x.startswith('_')]: val = node.__dict__[key] if val is None: continue if (isinstance(val, (str, bytes))): e.set(key, native(val)) elif isinstance(val, LayeredConfig): # FIXME: this is an # ugly hack to avoid # problems with # pdfreader.TextBox.font continue else: e.set(key, repr(val)) if isinstance(node, str): if node: e.text = str(node) elif isinstance(node, bytes): if node: e.text = node.decode() elif isinstance(node, int): e.text = str(node) elif isinstance(node, list): for x in node: e.append(__serialize_xml(x)) else: e.text = repr(node) # raise TypeError("Can't serialize %r (%r)" % (type(node), node)) return e
def __serializeNode(node, serialize_hidden_attrs=False): # print "serializing: %r" % node # Special handling of pyparsing.ParseResults -- deserializing of # these won't work (easily) if isinstance(node, pyparsing.ParseResults): xml = util.parseresults_as_xml(node) return ET.XML(xml) # We use type() instead of isinstance() because we want to # serialize str derived types using their correct class names if type(node) == str: nodename = "str" elif type(node) == bytes: nodename = "bytes" else: nodename = node.__class__.__name__ e = ET.Element(nodename) if hasattr(node, '__dict__'): for key in [x for x in list(node.__dict__.keys()) if serialize_hidden_attrs or not x.startswith('_')]: val = node.__dict__[key] if val is None: continue if (isinstance(val, (str,bytes))): e.set(key, val) else: e.set(key, repr(val)) if isinstance(node, str): if node: e.text = str(node) elif isinstance(node, bytes): if node: e.text = node.decode() elif isinstance(node, int): e.text = str(node) elif isinstance(node, list): for x in node: e.append(__serializeNode(x)) else: e.text = repr(node) # raise TypeError("Can't serialize %r (%r)" % (type(node), node)) return e
def parametric_test(self, filename): with codecs.open(filename, encoding="utf-8") as fp: testdata = fp.read() cp = CitationParser(self.parser) nodes = cp.parse_string(testdata) got = [] for node in nodes: if isinstance(node, str): got.append(node.strip()) else: (text, result) = node got.append(util.parseresults_as_xml(result).strip()) wantfile = os.path.splitext(filename)[0] + ".result" if os.path.exists(wantfile): with open(wantfile) as fp: want = [x.strip() for x in fp.read().split("\n\n")] else: print("\nparse_string() returns:") print("\n\n".join(compare)) self.fail("%s not found" % wantfile) self.maxDiff = 4096 self.assertListEqual(want,got)
def parametric_test(self, filename): with codecs.open(filename, encoding="utf-8") as fp: testdata = fp.read() cp = CitationParser(self.parser) nodes = cp.parse_string(testdata) got = [] for node in nodes: if isinstance(node, str): got.append(node.strip()) else: (text, result) = node got.append(util.parseresults_as_xml(result).strip()) wantfile = os.path.splitext(filename)[0] + ".result" if os.path.exists(wantfile): with open(wantfile) as fp: want = [x.strip() for x in fp.read().split("\n\n")] else: print("\nparse_string() returns:") print("\n\n".join(compare)) self.fail("%s not found" % wantfile) self.maxDiff = 4096 self.assertListEqual(want, got)