Ejemplo n.º 1
0
    def test_serialize_roundtrip(self):
        # Create a elements object tree
        tree = Body([
            Section([Paragraph(["Hello"]),
                     Paragraph(["World"])],
                    ordinal="1",
                    title="Main section"),
            Section([
                42,
                date(2013, 11, 27),
                datetime(2013, 11, 27, 12, 0, 0), b'bytestring', {
                    'foo': 'bar',
                    'x': 'y'
                }
            ],
                    ordinal=2,
                    title="Native types")
        ])
        # roundtrip using the default XML format
        serialized = serialize(tree)
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, caller_globals=globals())
        self.assertEqual(tree, newtree)

        # make another section with special (but commonly used) types
        # and try to roundtrip them. The XML serialization format does
        # not support this.
        graph = Graph().parse(
            data="""@prefix dcterms: <http://purl.org/dc/terms/> .

<http://example.org/1> dcterms:title "Hello world"@en .
""",
            format="turtle")
        parseresult = urlparser.parseString("http://example.org/1")
        tree.append(Section([parseresult, graph], meta=graph))

        # roundtrip using JSON (which uses fully qualified classnames,
        # so we don't need to pass globals() into deserialize()
        serialized = serialize(tree, format="json")
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, format="json")

        # two pyparsing.ParseResult objects cannot be directly
        # compared (they don't implement __eq__), therefore we compare
        # their XML representations
        tree[2][0] = util.parseresults_as_xml(tree[2][0])
        newtree[2][0] = util.parseresults_as_xml(newtree[2][0])
        self.assertEqual(tree, newtree)
Ejemplo n.º 2
0
 def timetest(self, basefile, basedir):
     serialized_path = self.repo.store.serialized_path(
         basefile) + ".unparsed"
     serialized_path = serialized_path.replace(
         self.repo.store.datadir + "/serialized/",
         basedir + "/serialized/" + self.alias + "/")
     with codecs.open(serialized_path, "r", encoding="utf-8") as fp:
         doc = deserialize(fp.read(), format="json")
     refparser = self.repo.refparser
     # FIXME: These config values must be picked up by createtest,
     # serialized into json, then restored here from json rather
     # than be hardcoded.
     refparser._legalrefparser.kommittensbetankande = "1997:39"
     refparser._legalrefparser.currentlynamedlaws = {
         'personuppgiftslagen': '0000:0000',
         'persondatalagen': '1998:000',
         'utlänningslagen': '1989:529'
     }
     refparser._currentattribs = {
         'type': RPUBL.Proposition,
         'year': '1997/98',
         'no': '44'
     }
     # FIXME: we should use timeit here as well to get more stable
     # timings. Problem is that these tests take a long time as it
     # is...
     start = time.time()
     refparser.parse_recursive(doc)
     elapsed = time.time() - start
     return elapsed, extractrefs(doc)
Ejemplo n.º 3
0
    def test_serialize_roundtrip(self):
        # Create a elements object tree
        tree = Body([Section([Paragraph(["Hello"]),
                              Paragraph(["World"])],
                             ordinal="1",
                             title="Main section"),
                     Section([42,
                              date(2013,11,27),
                              datetime(2013,11,27,12,0,0),
                              b'bytestring',
                              {'foo': 'bar',
                               'x': 'y'}],
                             ordinal=2,
                             title="Native types")
                 ])
        # roundtrip using the default XML format
        serialized = serialize(tree)
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, caller_globals=globals())
        self.assertEqual(tree, newtree)

        # make another section with special (but commonly used) types
        # and try to roundtrip them. The XML serialization format does
        # not support this.
        graph = Graph().parse(data="""@prefix dcterms: <http://purl.org/dc/terms/> .

<http://example.org/1> dcterms:title "Hello world"@en .
""", format="turtle")
        parseresult = urlparser.parseString("http://example.org/1")
        tree.append(Section([parseresult,
                             graph],
                            meta=graph))
        
        # roundtrip using JSON (which uses fully qualified classnames,
        # so we don't need to pass globals() into deserialize()
        serialized = serialize(tree, format="json")
        self.assertIsInstance(serialized, str)
        newtree = deserialize(serialized, format="json")

        # two pyparsing.ParseResult objects cannot be directly
        # compared (they don't implement __eq__), therefore we compare
        # their XML representations
        tree[2][0] = util.parseresults_as_xml(tree[2][0])
        newtree[2][0] = util.parseresults_as_xml(newtree[2][0])
        self.assertEqual(tree, newtree)
Ejemplo n.º 4
0
 def test_json_roundtrip(self):
     # a more realistic roundtrip example with some hairy parts
     from ferenda import PDFDocumentRepository, PDFReader
     d = PDFDocumentRepository()
     doc = d.make_document("sample")
     # make SURE that the intermediate files are newer than the pdf
     os.utime("test/files/pdfreader/intermediate/sample.xml", None)
     reader = PDFReader(filename="test/files/pdfreader/sample.pdf",
                        workdir="test/files/pdfreader/intermediate")
     d.parse_from_pdfreader(reader, doc)
     jsondoc = serialize(doc, format="json")
     newdoc = deserialize(jsondoc, format="json")
     self.assertEqual(doc, newdoc)
Ejemplo n.º 5
0
 def test_json_roundtrip(self):
     # a more realistic roundtrip example with some hairy parts
     from ferenda import PDFDocumentRepository, PDFReader
     d = PDFDocumentRepository()
     doc = d.make_document("sample")
     # make SURE that the intermediate files are newer than the pdf
     os.utime("test/files/pdfreader/intermediate/sample.xml", None)
     reader = PDFReader(filename="test/files/pdfreader/sample.pdf",
                        workdir="test/files/pdfreader/intermediate")
     d.parse_from_pdfreader(reader, doc)
     jsondoc = serialize(doc, format="json")
     newdoc = deserialize(jsondoc, format="json")
     self.assertEqual(doc, newdoc)
Ejemplo n.º 6
0
 def test_serialize_roundtrip(self):
     # Create a elements object tree
     tree = Body([Section([Paragraph(["Hello"]),
                           Paragraph(["World"])],
                          ordinal="1",
                          title="Main section"),
                  Section([42,
                           date(2013,11,27),
                           b'bytestring',
                           {'foo': 'bar',
                            'x': 'y'}],
                          ordinal=2,
                          title="Native types")
              ])
     serialized = serialize(tree)
     self.assertIsInstance(serialized, str)
     newtree = deserialize(serialized, globals())
     self.assertEqual(tree, newtree)
Ejemplo n.º 7
0
 def timetest(self, basefile, basedir):
     serialized_path = self.repo.store.serialized_path(basefile) + ".unparsed"
     serialized_path = serialized_path.replace(self.repo.store.datadir+"/serialized/", basedir+"/serialized/" + self.alias + "/")
     with codecs.open(serialized_path, "r", encoding="utf-8") as fp:
         doc = deserialize(fp.read(), format="json")
     refparser = self.repo.refparser
     # FIXME: These config values must be picked up by createtest,
     # serialized into json, then restored here from json rather
     # than be hardcoded.
     refparser._legalrefparser.kommittensbetankande = "1997:39"
     refparser._legalrefparser.currentlynamedlaws = {'personuppgiftslagen': '0000:0000',
                                                     'persondatalagen': '1998:000',
                                                     'utlänningslagen': '1989:529'}
     refparser._currentattribs = {'type': RPUBL.Proposition,
                                  'year': '1997/98',
                                  'no': '44'}
     # FIXME: we should use timeit here as well to get more stable
     # timings. Problem is that these tests take a long time as it
     # is...
     start = time.time()
     refparser.parse_recursive(doc)
     elapsed = time.time() - start
     return elapsed, extractrefs(doc)