예제 #1
0
    def test_facet_query(self):
        results1 = json.load(open("test/files/datasets/results1.json"))
        results2 = json.load(open("test/files/datasets/results2.json"))

        self.loader.add_serialized(
            util.readfile("test/files/datasets/books.ttl"),
            format="turtle", context="http://example.org/ctx/base")
        self.loader.add_serialized(
            util.readfile("test/files/datasets/articles.ttl"),
            format="turtle", context="http://example.org/ctx/other")

        # Since the query is partially constructed by DocumentRepository, we
        # need to run that code.
        import rdflib
        from ferenda import DocumentRepository
        repo = DocumentRepository()
        repo.config.storetype = self.storetype
        repo.rdf_type = rdflib.URIRef("http://purl.org/ontology/bibo/Book")

        # test 1
        sq = repo.facet_query("http://example.org/ctx/base")
        got = self.store.select(sq, format="python")
        self.assertEqual(len(got), len(results1))
        for row in results1:
            self.assertIn(row, got)

        # test 2
        sq = repo.facet_query("http://example.org/ctx/other")
        got = self.store.select(sq, format="python")
        self.assertEqual(len(got), len(results2))
        for row in results2:
            self.assertIn(row, got)

        if self.storetype == "SLEEPYCAT":
            self.store.graph.close()
예제 #2
0
    def test_parse(self):
        self.repo.download()
        self.assertTrue(self.repo.parse("1")) # both A and B can
                                              # handle this but B
                                              # should win
        self.assertEqual("basefile 1, parsed by b",
                         util.readfile(self.repo.store.parsed_path("1")))
        self.assertEqual("basefile 1, metadata from b",
                         util.readfile(self.repo.store.distilled_path("1")))
        self.assertEqual(["attach.txt"],
                         list(self.repo.store.list_attachments("1", "parsed")))
        with self.assertRaises(errors.ParseError):
            self.assertFalse(self.repo.parse("2")) # none can handle this
        self.assertTrue(self.repo.parse("3")) # only A can handle this
        self.assertEqual("basefile 3, parsed by a",
                         util.readfile(self.repo.store.parsed_path("3")))
        self.assertEqual("basefile 3, metadata from a",
                         util.readfile(self.repo.store.distilled_path("3")))
        self.assertEqual([], # this repo supports attachment, but
                             # underlying repo A did not
                         list(self.repo.store.list_attachments("3", "parsed")))
                        
        # in this case, all files should be up-to-date, so no copying
        # should occur (triggering the "Attachments are (likely)
        # up-to-date branch")
        self.assertTrue(self.repo.parse("1")) 

        # and finally, list_basefiles_for("generate") should delegate
        # to DocumentStore.list_basefiles_for
        self.assertEqual(set(["1", "3"]),
                         set(self.repo.store.list_basefiles_for("generate")))
예제 #3
0
    def test_open(self):
        wanted_filename = self.store.path("basefile", "maindir", ".suffix")
        with self.store.open("basefile", "maindir", ".suffix", "w") as fp:
            self.assertNotEqual(fp.name, wanted_filename)
            self.assertEqual(fp.realname, wanted_filename)
            fp.write("This is the data")
        self.assertEqual(util.readfile(wanted_filename),
                         "This is the data")
        mtime = os.stat(wanted_filename).st_mtime

        # make sure that the open method also can be used
        with self.store.open("basefile", "maindir", ".suffix") as fp:
            self.assertEqual("This is the data",
                             fp.read())

        # make sure writing identical content does not actually write
        # a new file
        time.sleep(.1) # just to get a different mtime
        with self.store.open("basefile", "maindir", ".suffix", "w") as fp:
            fp.write("This is the data")
        self.assertEqual(os.stat(wanted_filename).st_mtime,
                         mtime)

        # make sure normal
        fp = self.store.open("basefile", "maindir", ".suffix", "w")
        fp.write("This is the new data")
        fp.close()
        self.assertEqual(util.readfile(wanted_filename),
                         "This is the new data")
예제 #4
0
    def test_parse(self):
        self.repo.download()
        self.assertTrue(self.repo.parse("1"))  # both A and B can
        # handle this but B
        # should win
        self.assertEqual("basefile 1, parsed by b",
                         util.readfile(self.repo.store.parsed_path("1")))
        self.assertEqual("basefile 1, metadata from b",
                         util.readfile(self.repo.store.distilled_path("1")))
        self.assertEqual(["attach.txt"],
                         list(self.repo.store.list_attachments("1", "parsed")))
        with self.assertRaises(errors.ParseError):
            self.assertFalse(self.repo.parse("2"))  # none can handle this
        self.assertTrue(self.repo.parse("3"))  # only A can handle this
        self.assertEqual("basefile 3, parsed by a",
                         util.readfile(self.repo.store.parsed_path("3")))
        self.assertEqual("basefile 3, metadata from a",
                         util.readfile(self.repo.store.distilled_path("3")))
        self.assertEqual(
            [],  # this repo supports attachment, but
            # underlying repo A did not
            list(self.repo.store.list_attachments("3", "parsed")))

        # in this case, all files should be up-to-date, so no copying
        # should occur (triggering the "Attachments are (likely)
        # up-to-date branch")
        self.assertTrue(self.repo.parse("1"))

        # and finally, list_basefiles_for("generate") should delegate
        # to DocumentStore.list_basefiles_for
        self.assertEqual(set(["1", "3"]),
                         set(self.repo.store.list_basefiles_for("generate")))
예제 #5
0
    def assertEqualDirs(self, want, got, suffix=None, subset=False, filterdir="entries"):
        """Assert that two directory trees contains identical files

        :param want: The expected directory tree
        :type  want: str
        :param got: The actual directory tree
        :type  got: str
        :param suffix: If given, only check files ending in suffix (otherwise check all the files
        :type  suffix: str
        :param subset: If True, require only that files in want is a subset of files in got (otherwise require that the sets are identical)
        :type subset: bool
        :param filterdir: If given, don't compare the parts of the tree that starts with filterdir
        :type  suffix: str
        """
        wantfiles = [x[len(want) + 1:]
                     for x in util.list_dirs(want, suffix) if not x.startswith(want + os.sep + filterdir)]
        gotfiles = [x[len(got) + 1:]
                    for x in util.list_dirs(got, suffix) if not x.startswith(got + os.sep + filterdir)]
        self.maxDiff = None
        if subset:
            self.assertTrue(set(wantfiles).issubset(set(gotfiles)))
        else:
            self.assertEqual(wantfiles, gotfiles)  # or assertIn?
        for f in wantfiles:
            if not filecmp.cmp(os.path.join(want, f),
                               os.path.join(got, f),
                               shallow=False):
                self.assertEqual(util.readfile(os.path.join(want, f), mode="rb"),
                                 util.readfile(os.path.join(got, f), mode="rb"))
예제 #6
0
파일: testUtil.py 프로젝트: zigit/ferenda
    def test_replace_if_different(self):
        # test 1: dst does not exist
        util.writefile(self.fname, "Hello")
        self.assertTrue(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))
        self.assertTrue(os.path.exists(self.fname2))

        # test 2: dst exists, but is different (gets overwritten)
        util.writefile(self.fname, "Hello (different)")
        self.assertTrue(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))
        self.assertEqual("Hello (different)",
                         util.readfile(self.fname2))

        # test 3: src and dst is identical (src gets removed)
        util.writefile(self.fname, "Hello (different)")
        self.assertFalse(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))

        # test 4: dst exist, is different, gets archived
        newfile = self.dname+"/new.txt"
        archivefile = self.dname+"/archive.txt"
        util.writefile(newfile, "Hello (archiving)")
        self.assertTrue(util.replace_if_different(newfile, self.fname2, archivefile))
        self.assertFalse(os.path.exists(newfile))
        self.assertEqual("Hello (archiving)",
                         util.readfile(self.fname2))
        self.assertEqual("Hello (different)",
                         util.readfile(archivefile))
예제 #7
0
    def test_open(self):
        wanted_filename = self.store.path("basefile", "maindir", ".suffix")
        with self.store.open("basefile", "maindir", ".suffix", "w") as fp:
            self.assertNotEqual(fp.name, wanted_filename)
            self.assertEqual(fp.realname, wanted_filename)
            fp.write("This is the data")
        self.assertEqual(util.readfile(wanted_filename),
                         "This is the data")
        mtime = os.stat(wanted_filename).st_mtime

        # make sure that the open method also can be used
        with self.store.open("basefile", "maindir", ".suffix") as fp:
            self.assertEqual("This is the data",
                             fp.read())

        # make sure writing identical content does not actually write
        # a new file
        time.sleep(.1) # just to get a different mtime
        with self.store.open("basefile", "maindir", ".suffix", "w") as fp:
            fp.write("This is the data")
        self.assertEqual(os.stat(wanted_filename).st_mtime,
                         mtime)

        # make sure normal
        fp = self.store.open("basefile", "maindir", ".suffix", "w")
        fp.write("This is the new data")
        fp.close()
        self.assertEqual(util.readfile(wanted_filename),
                         "This is the new data")
예제 #8
0
 def test_get_serialized_file(self):
     want = tempfile.mktemp(suffix=".nt")
     util.writefile(want, util.readfile("test/files/datasets/dataset.nt"))
     got = tempfile.mktemp(suffix=".nt")
     self.loader.add_serialized(
         util.readfile("test/files/datasets/dataset.nt"),format="nt")
     del self.loader
     self.store.get_serialized_file(got, format="nt")
     self.assertEqualGraphs(want,got)
예제 #9
0
    def parse_test(self, downloaded_file, xhtml_file, docroot):
        """This test is run once for each basefile found in
        docroot/downloaded. It performs a full parse, and verifies that
        the resulting XHTML document is equal to the XHTML file placed in
        docroot/parsed/.

        """
        basefile = self.filename_to_basefile(downloaded_file)

        def runtest():
            if "FERENDA_LOG_TEST" in os.environ:
                loglevel = {
                    "DEBUG": logging.DEBUG,
                    "INFO": logging.INFO,
                    "WARNING": logging.WARNING,
                    "ERROR": logging.ERROR,
                    "CRITICAL": logging.CRITICAL
                }.get(os.environ["FERENDA_LOG_TEST"], logging.INFO)
                logformat = "%(asctime)s %(name)s %(levelname)s %(message)s"
                datefmt = "%H:%M:%S"
                handler = logging.StreamHandler()
                handler.setLevel(loglevel)
                handler.setFormatter(
                    logging.Formatter(logformat, datefmt=datefmt))
                logger = logging.getLogger()
                logger.setLevel(loglevel)
                # shut some non-core loggers up
                for logname in [
                        'requests.packages.urllib3.connectionpool',
                        'rdflib.plugins.sleepycat',
                        'rdflib.plugins.parsers.pyRdfa',
                        'ferenda.thirdparty.patch'
                ]:
                    log = logging.getLogger(logname)
                    log.propagate = False
                logger.addHandler(handler)
            self.repo.parse(basefile)
            if "FERENDA_LOG_TEST" in os.environ:
                logger.removeHandler(handler)

        if "FERENDA_PROFILE_TEST" in os.environ:
            print("Profiling test")
            import cProfile
            cProfile.runctx("runtest()", globals(), locals(), sort="cumtime")
        else:
            runtest()
        print = builtins.print
        if 'FERENDA_SET_TESTFILE' in os.environ:
            print("Overwriting '%s' with result of parse ('%s')" %
                  (xhtml_file, basefile))
            util.robust_rename(xhtml_file, xhtml_file + "~")
            shutil.copy2(self.repo.store.parsed_path(basefile), xhtml_file)
            return
        self.assertEqualXML(util.readfile(xhtml_file),
                            util.readfile(
                                self.repo.store.parsed_path(basefile)),
                            tidy_xhtml=True)
예제 #10
0
 def test_get_serialized_file(self):
     want = tempfile.mktemp(suffix=".nt")
     util.writefile(want, util.readfile("test/files/datasets/dataset.nt"))
     got = tempfile.mktemp(suffix=".nt")
     self.loader.add_serialized(
         util.readfile("test/files/datasets/dataset.nt"),format="nt")
     del self.loader
     self.store.get_serialized_file(got, format="nt")
     self.assertEqualGraphs(want,got)
예제 #11
0
 def test_download(self):
     self.repo.download()
     self.assertEqual("basefile 1, repo a",
                      util.readfile(self.datadir+"/a/downloaded/1.html"))
     self.assertEqual("basefile 1, repo b",
                      util.readfile(self.datadir+"/b/downloaded/1/index.html"))
     self.assertEqual("basefile 2, repo b",
                      util.readfile(self.datadir+"/b/downloaded/2/index.html"))
     self.assertEqual("basefile 3, repo a",
                      util.readfile(self.datadir+"/a/downloaded/3.html"))
예제 #12
0
    def test_sesame_get_serialized(self, mock_get):
        store = TripleStore.connect("SESAME", "", "")
        want = util.readfile("test/files/triplestore/combinedgraph.nt", "rb")
        got = store.get_serialized() 
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 1)

        want = util.readfile("test/files/triplestore/namedgraph.nt", "rb")
        got = store.get_serialized(context="namedgraph") # results in single get
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 2)
예제 #13
0
    def test_sesame_get_serialized(self, mock_get):
        store = TripleStore.connect("SESAME", "", "")
        want = util.readfile("test/files/triplestore/combinedgraph.nt", "rb")
        got = store.get_serialized() 
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 1)

        want = util.readfile("test/files/triplestore/namedgraph.nt", "rb")
        got = store.get_serialized(context="namedgraph") # results in single get
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 2)
예제 #14
0
        def _loadgraph(filename):
            g = rdflib.Graph()
            # we must read the data ourself, providing a non-ascii
            # filename to Graph.parse fails deep in rdflib internals
            format = guess_format(filename)
            if format == "nt":
                data = util.readfile(filename, "r", encoding="utf-8")
            else:
                data = util.readfile(filename, "rb")

            g.parse(data=data, format=format)
            return g
예제 #15
0
        def _loadgraph(filename):
            g = rdflib.Graph()
            # we must read the data ourself, providing a non-ascii
            # filename to Graph.parse fails deep in rdflib internals
            format = guess_format(filename)
            if format == "nt":
                data = util.readfile(filename, "r", encoding="utf-8")
            else:
                data = util.readfile(filename, "rb")

            g.parse(data=data, format=format)
            return g
예제 #16
0
 def test_download(self):
     self.repo.download()
     self.assertEqual("basefile 1, repo a",
                      util.readfile(self.datadir + "/a/downloaded/1.html"))
     self.assertEqual(
         "basefile 1, repo b",
         util.readfile(self.datadir + "/b/downloaded/1/index.html"))
     self.assertEqual(
         "basefile 2, repo b",
         util.readfile(self.datadir + "/b/downloaded/2/index.html"))
     self.assertEqual("basefile 3, repo a",
                      util.readfile(self.datadir + "/a/downloaded/3.html"))
예제 #17
0
    def parse_test(self, downloaded_file, xhtml_file, docroot):
        """This test is run once for each basefile found in
        docroot/downloaded. It performs a full parse, and verifies that
        the resulting XHTML document is equal to the XHTML file placed in
        docroot/parsed/.

        """
        basefile = self.filename_to_basefile(downloaded_file)
        def runtest():
            if "FERENDA_LOG_TEST" in os.environ:
                loglevel = {
                    "DEBUG": logging.DEBUG,
                    "INFO": logging.INFO,
                    "WARNING": logging.WARNING,
                    "ERROR": logging.ERROR,
                    "CRITICAL": logging.CRITICAL
                    }.get(os.environ["FERENDA_LOG_TEST"], logging.INFO)
                logformat = "%(asctime)s %(name)s %(levelname)s %(message)s"
                datefmt = "%H:%M:%S"
                handler = logging.StreamHandler()
                handler.setLevel(loglevel)
                handler.setFormatter(logging.Formatter(logformat, datefmt=datefmt))
                logger = logging.getLogger()
                logger.setLevel(loglevel)
                # shut some non-core loggers up
                for logname in ['requests.packages.urllib3.connectionpool',
                                'rdflib.plugins.sleepycat',
                                'rdflib.plugins.parsers.pyRdfa',
                                'ferenda.thirdparty.patch']:
                    log = logging.getLogger(logname)
                    log.propagate = False
                logger.addHandler(handler)
            self.repo.parse(basefile)
            if "FERENDA_LOG_TEST" in os.environ:
                logger.removeHandler(handler)
            

        if "FERENDA_PROFILE_TEST" in os.environ:
            print("Profiling test")
            import cProfile
            cProfile.runctx("runtest()", globals(), locals(), sort="cumtime")
        else:
            runtest()
        print = builtins.print
        if 'FERENDA_SET_TESTFILE' in os.environ:
            print("Overwriting '%s' with result of parse ('%s')" % (xhtml_file, basefile))
            util.robust_rename(xhtml_file, xhtml_file + "~")
            shutil.copy2(self.repo.store.parsed_path(basefile), xhtml_file)
            return
        self.assertEqualXML(util.readfile(xhtml_file),
                            util.readfile(self.repo.store.parsed_path(basefile)),
                            tidy_xhtml=True)
예제 #18
0
    def test_construct_annotations_rfc(self):
        # print("Not loading, re-using data")
        self.loader.add_serialized(
             util.readfile("test/files/datasets/rfc.nt"), format="nt",
            context="http://localhost:8000/dataset/rfc"
        )

        uri = "http://localhost:8000/res/rfc/7066"
        sq = util.readfile("ferenda/sources/tech/res/sparql/rfc-annotations.rq") % {'uri': uri}
        got = self.store.construct(sq)
        want = Graph()
        want.parse(data=util.readfile("test/files/datasets/annotations-rfc.nt"),
                   format="nt")
        self.assertEqualGraphs(want, got, exact=True)
예제 #19
0
    def test_construct_annotations_rfc(self):
        # print("Not loading, re-using data")
        self.loader.add_serialized(
             util.readfile("test/files/datasets/rfc.nt"), format="nt",
            context="http://localhost:8000/dataset/rfc"
        )

        uri = "http://localhost:8000/res/rfc/7066"
        sq = util.readfile("ferenda/res/sparql/rfc-annotations.rq") % {'uri': uri}
        got = self.store.construct(sq)
        want = Graph()
        want.parse(data=util.readfile("test/files/datasets/annotations-rfc.nt"),
                   format="nt")
        self.assertEqualGraphs(want, got, exact=True)
예제 #20
0
 def makeresponse(*args, **kwargs):
     if len(returned) > len(responses):
         raise IndexError("Ran out of canned responses after %s calls" % len(returned))
     resp = Mock()
     resp.status_code = responses[len(returned)][0]
     responsefile = responses[len(returned)][1]
     if responsefile:
         responsefile = "test/files/triplestore/" + responsefile
         resp.content = util.readfile(responsefile, "rb")
         resp.text = util.readfile(responsefile)
         if responsefile.endswith(".json"):
             data = json.loads(util.readfile(responsefile))
             resp.json = Mock(return_value=data)
     returned.append(True)
     return resp
예제 #21
0
 def makeresponse(*args, **kwargs):
     if len(returned) > len(responses):
         raise IndexError("Ran out of canned responses after %s calls" % len(returned))
     resp = Mock()
     resp.status_code = responses[len(returned)][0]
     responsefile = responses[len(returned)][1]
     if responsefile:
         responsefile = "test/files/triplestore/" + responsefile
         resp.content = util.readfile(responsefile, "rb")
         resp.text = util.readfile(responsefile)
         if responsefile.endswith(".json"):
             data = json.loads(util.readfile(responsefile))
             resp.json = Mock(return_value=data)
     returned.append(True)
     return resp
예제 #22
0
    def test_construct(self):
        self.loader.add_serialized(
            util.readfile("test/files/datasets/addressbook.ttl"),
            format="turtle")
        del self.loader

        sq = """PREFIX ab: <http://learningsparql.com/ns/addressbook#>
                PREFIX d: <http://learningsparql.com/ns/data#>

                CONSTRUCT { ?person ?p ?o . }
                WHERE {
                    ?person ab:firstName "Craig" ; ab:lastName "Ellis" ;
                ?p ?o . }"""
        want = Graph()
        want.parse(data="""
@prefix d:<http://learningsparql.com/ns/data#> . 
@prefix ab:<http://learningsparql.com/ns/addressbook#> .

d:i8301
    ab:email "*****@*****.**",
             "*****@*****.**" ;
    ab:firstName "Craig" ;
    ab:lastName "Ellis" .
""", format="turtle")
        if self.store.__class__ == FusekiStore:
            got = self.store.construct(sq, uniongraph=False)
        else:
            got = self.store.construct(sq)

        # self.assertTrue(isomorphic(want,got))
        self.assertEqualGraphs(want, got, exact=True)
        if self.store.__class__ == SleepycatStore:
            self.store.graph.close()
예제 #23
0
파일: wsgiapp.py 프로젝트: zigit/ferenda
 def __init__(self, repos, inifile=None, **kwargs):
     super(WSGIApp, self).__init__(repos, inifile, **kwargs)
     sfsrepo = [repo for repo in repos if repo.alias == "sfs"][0]
     self.parser = SwedishCitationParser(
         LegalRef(LegalRef.RATTSFALL, LegalRef.LAGRUM, LegalRef.KORTLAGRUM,
                  LegalRef.FORARBETEN, LegalRef.MYNDIGHETSBESLUT),
         sfsrepo.minter,
         sfsrepo.commondata,
         allow_relative=True)
     graph = Graph().parse(sfsrepo.resourceloader.filename("extra/sfs.ttl"),
                           format="turtle")
     self.lagforkortningar = [
         str(o) for s, o in graph.subject_objects(DCTERMS.alternate)
     ]
     self.paragraflag = []
     for s, o in graph.subject_objects(DCTERMS.alternate):
         basefile = sfsrepo.basefile_from_uri(str(s))
         distilledpath = sfsrepo.store.distilled_path(basefile)
         firstpara_uri = str(s) + "#P1"
         needle = '<rpubl:Paragraf rdf:about="%s">' % firstpara_uri
         if os.path.exists(distilledpath) and needle in util.readfile(
                 distilledpath):
             self.paragraflag.append(str(o).lower())
     self.lagnamn = [str(o) for s, o in graph.subject_objects(RDFS.label)]
     self.lagforkortningar_regex = "|".join(
         sorted(self.lagforkortningar, key=len, reverse=True))
예제 #24
0
파일: pbr.py 프로젝트: staffanm/ferenda
    def download_single(self, basefile, url):
        updated = False
        created = False
        filename = self.store.downloaded_path(basefile)
        created = not os.path.exists(filename)
        # util.print_open_fds()
        if self.download_if_needed(url, basefile):
            if created:
                self.log.info("%s: downloaded from %s" % (basefile, url))
            else:
                self.log.info(
                    "%s: downloaded new version from %s" % (basefile, url))
            updated = True
        else:
            self.log.debug("%s: exists and is unchanged" % basefile)
        soup = BeautifulSoup(util.readfile(filename), "lxml")
        for pdflink in soup.find_all("a", href=re.compile("\.pdf$")):
            slug =  "-".join(pdflink["href"].rsplit("/")[-2:])
            attachment_path = self.store.downloaded_path(basefile, attachment=slug)
            self.download_if_needed(urljoin(url, pdflink["href"]), basefile, filename=attachment_path)
        vm = soup.find("a", text="Visa Varumärke")
        if vm:
            attachment_path = self.store.downloaded_path(basefile, attachment="varumarke.jpg")
            attachment_url = re.search("http[^'\"]*", vm["href"]).group(0)
            self.download_if_needed(attachment_url, basefile, filename=attachment_path)

        entry = DocumentEntry(self.store.documententry_path(basefile))
        now = datetime.now()
        entry.orig_url = url
        if created:
            entry.orig_created = now
        if updated:
            entry.orig_updated = now
        entry.orig_checked = now
        entry.save()
예제 #25
0
 def test_fuseki_get_serialized(self, mock_get):
     store = TripleStore.connect("FUSEKI", "", "", curl=False)
     # test 1: a namedgraph (cases with no context are already run by
     # test_fuseki_get_serialized_file)
     want = util.readfile("test/files/triplestore/namedgraph.nt", "rb")
     got = store.get_serialized(context="namedgraph") # results in single get
     self.assertEqual(want, got)
예제 #26
0
 def test_add_serialized(self):
     # test adding to default graph
     self.assertEqual(0,self.store.triple_count())
     self.store.add_serialized(
         util.readfile("test/files/datasets/dataset.nt"),
         format="nt")
     self.assertEqual(7,self.store.triple_count())
예제 #27
0
    def test_construct(self):
        self.loader.add_serialized(
            util.readfile("test/files/datasets/addressbook.ttl"),
            format="turtle")
        del self.loader

        sq = """PREFIX ab: <http://learningsparql.com/ns/addressbook#>
                PREFIX d: <http://learningsparql.com/ns/data#>

                CONSTRUCT { ?person ?p ?o . }
                WHERE {
                    ?person ab:firstName "Craig" ; ab:lastName "Ellis" ;
                ?p ?o . }"""
        want = Graph()
        want.parse(data="""
@prefix d:<http://learningsparql.com/ns/data#> . 
@prefix ab:<http://learningsparql.com/ns/addressbook#> .

d:i8301
    ab:email "*****@*****.**",
             "*****@*****.**" ;
    ab:firstName "Craig" ;
    ab:lastName "Ellis" .
""", format="turtle")
        if self.store.__class__ == FusekiStore:
            got = self.store.construct(sq, uniongraph=False)
        else:
            got = self.store.construct(sq)

        # self.assertTrue(isomorphic(want,got))
        self.assertEqualGraphs(want, got, exact=True)
        if self.store.__class__ == SleepycatStore:
            self.store.graph.close()
예제 #28
0
파일: static.py 프로젝트: staffanm/ferenda
    def parse(self, doc):
        source = util.readfile(self.store.downloaded_path(doc.basefile))
        html = publish_string(source, writer_name="html")
        soup = BeautifulSoup(html, "lxml")
        docinfo = soup.find("table", "docinfo")
        docuri = URIRef(doc.uri)
        if docinfo:
            # this is where our custom metadata goes
            for row in docinfo.find_all("tr", "field"):
                key, val = row.th.text.strip(), row.td.text.strip()
            if key == 'footer-order:':
                doc.meta.add((docuri, OLO['index'], Literal(int(val))))
            else:
                self.log.warning("%s: Unknown metadata directive %s (%s)" %
                                 (doc.basefile, key, val))

            # we don't need these in the final result
            docinfo.decompose()
        soup.find("h1", "title").decompose()

        doc.body = elements_from_soup(soup.body)
        doc.meta.add((docuri, DCTERMS.title,
                      Literal(soup.title.text, doc.lang)))
        doc.meta.add((docuri, PROV.wasGeneratedBy, Literal(self.qualified_class_name())))
        doc.meta.add((docuri, RDF.type, self.rdf_type))
        self.parse_entry_update(doc)
        return True
예제 #29
0
    def test_download_setfile(self):
        # create a empty.json
        os.mkdir(self.datadir+"/source")
        with open(self.datadir+"/source/empty.json", "w") as fp:
            s = json.dumps({'@settings': {'config': {'refresh': True}}},
                          separators=(', ', ': '))
            fp.write(s)


        os.environ["FERENDA_SET_TESTFILE"] = "true"
        self._runtest()
        del os.environ["FERENDA_SET_TESTFILE"]

        # make sure downloaded files have been placed where they
        # should + empty.json has correct content.
        self.assertTrue(os.path.exists(self.datadir+"/source/empty-0.html"))
        self.assertEqual("<p>This is doc A</p>",
                         util.readfile(self.datadir+"/source/empty-1.html"))
                         
        with open(self.datadir+"/source/empty.json") as fp:
            gotjson = json.load(fp)
        wantjson = copy(self.basicjson)
        wantjson['http://example.org/']['file'] = "empty-0.html"
        wantjson['http://example.org/doc/a_.html']['file'] = "empty-1.html"
        self.assertEqual(wantjson, gotjson)
        
        pass
예제 #30
0
 def test_add_serialized(self):
     # test adding to default graph
     self.assertEqual(0, self.store.triple_count())
     self.store.add_serialized(
         util.readfile("test/files/datasets/dataset.nt"),
         format="nt")
     self.assertEqual(7,self.store.triple_count())
예제 #31
0
    def parse(self, doc):
        head, body = util.readfile(self.store.downloaded_path(doc.basefile)).split("\n\n", 1)
        datestr, timestr, title = head.split(" ", 2)
        published = datetime.strptime("%s %s" % (datestr, timestr), "%Y-%m-%d %H:%M:%S")

        doc.meta.add((URIRef(doc.uri), RDF.type, self.rdf_type))
        doc.meta.add((URIRef(doc.uri), DCTERMS.issued, Literal(published)))
        doc.meta.add((URIRef(doc.uri), DCTERMS.title, Literal(title, lang=doc.lang)))
        soup = bs4.BeautifulSoup("<div class='sitenews-item'>"+body+"</div>", "lxml")
        doc.body = elements_from_soup(soup.body)
        # move timestamp into dcterms:issued, title into dcterms:title
        # parse body with elements_from_soup
        # set first real para as dcterms:abstract (XMLLiteral)
        doc.body[0][0] = Div([doc.body[0][0]],
                          datatype="rdf:XMLLiteral",
                          property="dcterms:abstract")

        # but we need to add it to doc.meta RIGHT AWAY because of reasons...
        doc.meta.add((URIRef(doc.uri), DCTERMS.abstract,
                      Literal(body.split("\n\n")[0], datatype=RDF.XMLLiteral)))
        self.parse_entry_update(doc) # need to set published and possibly updated
        entry = DocumentEntry(self.store.documententry_path(doc.basefile))
        entry.published = published
        entry.save()
        return True
예제 #32
0
    def test_download_setfile(self):
        # create a empty.json
        os.mkdir(self.datadir + "/source")
        with open(self.datadir + "/source/empty.json", "w") as fp:
            s = json.dumps({'@settings': {
                'config': {
                    'refresh': True
                }
            }},
                           separators=(', ', ': '))
            fp.write(s)

        os.environ["FERENDA_SET_TESTFILE"] = "true"
        self._runtest()
        del os.environ["FERENDA_SET_TESTFILE"]

        # make sure downloaded files have been placed where they
        # should + empty.json has correct content.
        self.assertTrue(os.path.exists(self.datadir + "/source/empty-0.html"))
        self.assertEqual("<p>This is doc A</p>",
                         util.readfile(self.datadir + "/source/empty-1.html"))

        with open(self.datadir + "/source/empty.json") as fp:
            gotjson = json.load(fp)
        wantjson = copy(self.basicjson)
        wantjson['http://example.org/']['file'] = "empty-0.html"
        wantjson['http://example.org/doc/a_.html']['file'] = "empty-1.html"
        self.assertEqual(wantjson, gotjson)

        pass
예제 #33
0
 def parametric_test(self, filename):
     # these options adjusts the constructed URIs. by default, the
     # official rpubl URIs are minted.
     #
     # self.repo.config.localizeuri = True
     # self.repo.config.url = "http://example.org/"
     # self.repo.config.urlpath = ''
     # a few of the subclasses have specialized rules. make sure we
     # instantiate the correct class
     repo = os.path.basename(filename).split("-")[0]
     basefile = os.path.splitext(os.path.basename(filename))[0].replace(
         "-", "/", 1).replace("-", ":")
     repoclass = self.aliases[repo]
     self.repo = repoclass(
         datadir=self.datadir,
         storelocation=self.datadir + "/ferenda.sqlite",
         indexlocation=self.datadir + "/whoosh",
     )
     doc = self.repo.make_document(basefile)
     text = self.repo.sanitize_text(util.readfile(filename), basefile)
     reader = TextReader(string=text, encoding='utf-8')
     self.repo.parse_metadata_from_textreader(reader, doc)
     wantfile = filename.replace(".txt", ".n3")
     if os.path.exists(wantfile):
         self.assertEqualGraphs(wantfile, doc.meta, exact=False)
     else:
         self.fail(
             "Expected a %s with the following content:\n\n%s" %
             (wantfile, doc.meta.serialize(format="n3").decode("utf-8")))
예제 #34
0
    def test_transform_html(self):
        base = self.datadir+os.sep
        with open(base+"teststyle.xslt","w") as fp:
            fp.write("""<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:param name="value"/>
    <xsl:param name="file"/>
    <xsl:variable name="content" select="document($file)/root/*"/>
    <xsl:template match="/">
        <output>
            <paramvalue><xsl:value-of select="$value"/></paramvalue>
            <paramfile><xsl:copy-of select="$content"/></paramfile>
            <infile><xsl:value-of select="/doc/title"/></infile>
        </output>
    </xsl:template>
</xsl:stylesheet>
""")
        with open(base+"paramfile.xml","w") as fp:
            fp.write("""<root><node key='value'><subnode>textnode</subnode></node></root>""")

        with open(base+"infile.xml","w") as fp:
            fp.write("""<doc><title>Document title</title></doc>""")
        t = Transformer("XSLT", base+"teststyle.xslt", ["res/xsl"], "")
        t.transform_file(base+"infile.xml", base+"outfile.xml",
                         {'value':'blahonga',
                          'file':base+'paramfile.xml'})
        self.assertEqualXML(util.readfile(base+"outfile.xml"),"""
        <output>
            <paramvalue>blahonga</paramvalue>
            <paramfile><node key='value'><subnode>textnode</subnode></node></paramfile>
            <infile>Document title</infile>
        </output>""")
예제 #35
0
    def parametric_test(self, filename):
        # these options adjusts the constructed URIs. by default, the
        # official rpubl URIs are minted.
        #
        # self.repo.config.localizeuri = True
        # self.repo.config.url = "http://example.org/"
        # self.repo.config.urlpath = ''
        # a few of the subclasses have specialized rules. make sure we
        # instantiate the correct class
        repo, basefile = self.parse_filename(filename)
        doc = repo.make_document(basefile)
        text = repo.sanitize_text(util.readfile(filename), basefile)
        reader = TextReader(string=text, encoding='utf-8')
        props = repo.extract_metadata(reader, basefile)
        props = repo.sanitize_metadata(props, basefile)
        resource = repo.polish_metadata(props, basefile)
        repo.infer_metadata(resource, basefile)

        wantfile = filename.replace(".txt", ".n3")
        if os.path.exists(wantfile):
            self.assertEqualGraphs(wantfile, resource.graph, exact=False)
        else:
            self.fail(
                "Expected a %s with the following content:\n\n%s" %
                (wantfile, doc.meta.serialize(format="n3").decode("utf-8")))
예제 #36
0
파일: sou.py 프로젝트: staffanm/ferenda
 def extract_metadata(self, rawhead, basefile):
     metadata = util.readfile(self.store.downloaded_path(
         basefile, attachment="index.rdf"))
     # For some reason these RDF files might use canonical
     # decomposition form (NFD) which is less optimal. Fix this.
     metadata = unicodedata.normalize("NFC", metadata)
     sourcegraph = Graph().parse(data=metadata)
     rooturi = sourcegraph.value(predicate=RDF.type, object=BIBO.Book)
     if rooturi is None:
         # then just try to identify the main uri and use that 
         subjects = set(sourcegraph.subjects())
         if len(subjects) == 1:
             rooturi = next(iter(subjects))
     title = sourcegraph.value(subject=rooturi, predicate=DC.title)
     issued = sourcegraph.value(subject=rooturi, predicate=DC.date)
     if isinstance(issued, str):
         # sometimes dc:date is weird like "1976[1974]" (SOU 1974:42)
         if len(issued) != 4:
             self.log.warning("expected issued date as single 4-digit year, got %s" % issued)
             # fall back on an approximation based on the basefile
             issued = basefile.split(":")[0]
         issued = Literal(util.gYear(int(issued)), datatype=XSD.gYear)
             
     attribs = self.metadata_from_basefile(basefile)
     attribs["dcterms:title"] = title
     if issued:
         attribs["dcterms:issued"] = issued
     return attribs
예제 #37
0
    def extract_metadata(self, rawhead, basefile):
        metadata = util.readfile(
            self.store.downloaded_path(basefile, attachment="index.rdf"))
        # For some reason these RDF files might use canonical
        # decomposition form (NFD) which is less optimal. Fix this.
        metadata = unicodedata.normalize("NFC", metadata)
        sourcegraph = Graph().parse(data=metadata)
        rooturi = sourcegraph.value(predicate=RDF.type, object=BIBO.Book)
        if rooturi is None:
            # then just try to identify the main uri and use that
            subjects = set(sourcegraph.subjects())
            if len(subjects) == 1:
                rooturi = next(iter(subjects))
        title = sourcegraph.value(subject=rooturi, predicate=DC.title)
        issued = sourcegraph.value(subject=rooturi, predicate=DC.date)
        if isinstance(issued, str):
            # sometimes dc:date is weird like "1976[1974]" (SOU 1974:42)
            if len(issued) != 4:
                self.log.warning(
                    "expected issued date as single 4-digit year, got %s" %
                    issued)
                # fall back on an approximation based on the basefile
                issued = basefile.split(":")[0]
            issued = Literal(util.gYear(int(issued)), datatype=XSD.gYear)

        attribs = self.metadata_from_basefile(basefile)
        attribs["dcterms:title"] = title
        if issued:
            attribs["dcterms:issued"] = issued
        return attribs
예제 #38
0
 def parse_test(self, downloaded_file, xhtml_file, docroot):
     # patch method so we control where the downloaded doc is
     # loaded from.
     basefile = self.filename_to_basefile(downloaded_file)
     # with patch('ferenda.DocumentStore.downloaded_path',
     #           return_value=downloaded_file):
     with patch.object(self.repo.documentstore_class, 'downloaded_path',
                       return_value=downloaded_file):
         self.repo.parse(basefile)
     if 'FERENDA_SET_TESTFILES' in os.environ:
         print("Overwriting %r with result of parse (%r)" % (xhtml_file, basefile))
         util.robust_rename(xhtml_file, xhtml_file + "~")
         shutil.copy2(self.repo.store.parsed_path(basefile), xhtml_file)
         return
     self.assertEqualXML(util.readfile(xhtml_file),
                         util.readfile(self.repo.store.parsed_path(basefile)))
예제 #39
0
    def test_doctype(self):
        base = self.datadir+os.sep
        util.ensure_dir(base+"teststyle-doctype.xslt")
        with open(base+"teststyle-doctype.xslt","w") as fp:
            fp.write("""<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="html"
	        doctype-system="about:legacy-compat"
	        omit-xml-declaration="yes"
	        encoding='utf-8'
	        indent="yes"/>
    <xsl:template match="/">
      <html>
        <head>
          <title><xsl:value-of select="/doc/title"/></title>
        </head>
        <body>
          <h1>hello world</h1>
        </body>
      </html>
    </xsl:template>
</xsl:stylesheet>
""")
        with open(base+"infile.xml","w") as fp:
            fp.write("""<doc><title>Document title</title></doc>""")
        t = Transformer("XSLT", base+"teststyle-doctype.xslt", "xsl", None, "")
        t.transform_file(base+"infile.xml", base+"outfile.xml")
        self.assertTrue(util.readfile(base+"outfile.xml").startswith('<!DOCTYPE html SYSTEM "about:legacy-compat">'))
예제 #40
0
 def download_is_different(self, existing, new):
     if existing.endswith(".html"):
         # load both existing and new into a BeautifulSoup object, then
         # compare the first <pre> element
         existing_soup = BeautifulSoup(
             util.readfile(existing, encoding=self.source_encoding), "lxml")
         new_soup = BeautifulSoup(
             util.readfile(new, encoding=self.source_encoding), "lxml")
         existing = existing_soup.find("div", "search-results-content")
         new = new_soup.find("div", "search-results-content")
         try:
             return existing != new
         except RuntimeError:  # can happen with at least v4.4.1 of beautifulsoup
             return True
     else:
         return super(Trips, self).download_is_different(existing, new)
예제 #41
0
파일: static.py 프로젝트: zigit/ferenda
    def parse(self, doc):
        source = util.readfile(self.store.downloaded_path(doc.basefile))
        html = publish_string(source, writer_name="html")
        soup = BeautifulSoup(html, "lxml")
        docinfo = soup.find("table", "docinfo")
        docuri = URIRef(doc.uri)
        if docinfo:
            # this is where our custom metadata goes
            for row in docinfo.find_all("tr", "field"):
                key, val = row.th.text.strip(), row.td.text.strip()
            if key == 'footer-order:':
                doc.meta.add((docuri, OLO['index'], Literal(int(val))))
            else:
                self.log.warning("%s: Unknown metadata directive %s (%s)" %
                                 (doc.basefile, key, val))

            # we don't need these in the final result
            docinfo.decompose()
        soup.find("h1", "title").decompose()

        doc.body = elements_from_soup(soup.body)
        doc.meta.add((docuri, DCTERMS.title, Literal(soup.title.text,
                                                     doc.lang)))
        doc.meta.add((docuri, PROV.wasGeneratedBy,
                      Literal(self.qualified_class_name())))
        doc.meta.add((docuri, RDF.type, self.rdf_type))
        self.parse_entry_update(doc)
        return True
예제 #42
0
 def test_fuseki_get_serialized(self, mock_get):
     store = TripleStore.connect("FUSEKI", "", "", curl=False)
     # test 1: a namedgraph (cases with no context are already run by
     # test_fuseki_get_serialized_file)
     want = util.readfile("test/files/triplestore/namedgraph.nt", "rb")
     got = store.get_serialized(context="namedgraph") # results in single get
     self.assertEqual(want, got)
예제 #43
0
    def parse(self, doc):
        head, body = util.readfile(self.store.downloaded_path(
            doc.basefile)).split("\n\n", 1)
        datestr, timestr, title = head.split(" ", 2)
        published = datetime.strptime("%s %s" % (datestr, timestr),
                                      "%Y-%m-%d %H:%M:%S")

        doc.meta.add((URIRef(doc.uri), RDF.type, self.rdf_type))
        doc.meta.add((URIRef(doc.uri), DCTERMS.issued, Literal(published)))
        doc.meta.add(
            (URIRef(doc.uri), DCTERMS.title, Literal(title, lang=doc.lang)))
        soup = bs4.BeautifulSoup(
            "<div class='sitenews-item'>" + body + "</div>", "lxml")
        doc.body = elements_from_soup(soup.body)
        # move timestamp into dcterms:issued, title into dcterms:title
        # parse body with elements_from_soup
        # set first real para as dcterms:abstract (XMLLiteral)
        doc.body[0][0] = Div([doc.body[0][0]],
                             datatype="rdf:XMLLiteral",
                             property="dcterms:abstract")

        # but we need to add it to doc.meta RIGHT AWAY because of reasons...
        doc.meta.add((URIRef(doc.uri), DCTERMS.abstract,
                      Literal(body.split("\n\n")[0], datatype=RDF.XMLLiteral)))
        self.parse_entry_update(
            doc)  # need to set published and possibly updated
        entry = DocumentEntry(self.store.documententry_path(doc.basefile))
        entry.published = published
        entry.save()
        return True
예제 #44
0
 def _loadgraph(filename):
     g = rdflib.Graph()
     # we must read the data ourself, providing a non-ascii
     # filename to Graph.parse fails deep in rdflib internals
     g.parse(data=util.readfile(filename, "rb"),
             format=guess_format(filename))
     return g
예제 #45
0
 def distill_test(self, downloaded_file, rdf_file, docroot):
     try:
         prefixlen = len(docroot + "/downloaded/")
         if self.repo.storage_policy == "dir":
             suffixlen = len(downloaded_file.split(os.sep)[-1]) + 1
         else:
             suffixlen = len(os.path.splitext(downloaded_file)[1])
         pathfrag = downloaded_file[prefixlen:-suffixlen]
         basefile = self.repo.store.pathfrag_to_basefile(pathfrag)
     except:
         basefile = self.filename_to_basefile(downloaded_file)
     with patch.object(self.repo.documentstore_class, 'downloaded_path',
                       return_value=downloaded_file):
         # self.repo.config.fsmdebug = True
         self.repo.parse(basefile)
     if 'FERENDA_SET_TESTFILES' in os.environ:
         print("Overwriting %r with result of parse (%r)" % (rdf_file, basefile))
         g = rdflib.Graph()
         g.parse(data=util.readfile(self.repo.store.distilled_path(basefile)))
         util.robust_rename(rdf_file, rdf_file + "~")
         with open(rdf_file, "wb") as fp:
             fp.write(g.serialize(format="turtle"))
         return
     self.assertEqualGraphs(rdf_file,
                            self.repo.store.distilled_path(basefile),
                            exact=False)
예제 #46
0
 def test_rdf(self):
     # basic test 3: accept: application/rdf+xml -> RDF statements (in XML)
     self.env['HTTP_ACCEPT'] = 'application/rdf+xml'
     status, headers, content = self.call_wsgi(self.env)
     self.assertResponse("200 OK",
                         {'Content-Type': 'application/rdf+xml'},
                         util.readfile(self.repo.store.distilled_path("123/a"), "rb"),
                         status, headers, content)
예제 #47
0
 def ttl_to_rdf_xml(self, inpath, outpath, store=None):
     if not store:
         store = self.repo.store
     g = Graph()
     g.parse(data=util.readfile(inpath, encoding="utf-8"), format="turtle")
     with _open(outpath, "wb") as fp:
         fp.write(g.serialize(format="pretty-xml"))
     return g
예제 #48
0
 def _test_pyfile(self, pyfile, want=True, comparator=None):
     pycode = compile(util.readfile(pyfile), pyfile, "exec")
     result = six.exec_(pycode, globals(), locals())
     # the exec:ed code is expected to set return_value
     got = locals()["return_value"]
     if not comparator:
         comparator = self.assertEqual
     comparator(want, got)
예제 #49
0
 def test_add_serialized_named_graph(self):
     self.test_add_serialized() # set up environment for this case
     self.store.add_serialized(
         util.readfile("test/files/datasets/dataset2.nt"),
         format="nt", context="http://example.org/ctx1")
     self.assertEqual(3,self.store.triple_count(
         context="http://example.org/ctx1"))
     self.assertEqual(10,self.store.triple_count())
예제 #50
0
 def test_add_serialized_named_graph(self):
     self.test_add_serialized() # set up environment for this case
     self.store.add_serialized(
         util.readfile("test/files/datasets/dataset2.nt"),
         format="nt", context="http://example.org/ctx1")
     self.assertEqual(3,self.store.triple_count(
         context="http://example.org/ctx1"))
     self.assertEqual(10,self.store.triple_count())
예제 #51
0
파일: trips.py 프로젝트: staffanm/ferenda
 def download_is_different(self, existing, new):
     if existing.endswith(".html"):
         # load both existing and new into a BeautifulSoup object, then
         # compare the first <pre> element
         existing_soup = BeautifulSoup(
             util.readfile(
                 existing,
                 encoding=self.source_encoding), "lxml")
         new_soup = BeautifulSoup(util.readfile(new, encoding=self.source_encoding), "lxml")
         existing = existing_soup.find("div", "search-results-content")
         new = new_soup.find("div", "search-results-content")
         try:
             return existing != new
         except RuntimeError: # can happen with at least v4.4.1 of beautifulsoup
             return True
     else:
         return super(Trips, self).download_is_different(existing, new)
예제 #52
0
 def test_xhtml(self):
     # basic test 2: accept: application/xhtml+xml -> parsed file
     self.env['HTTP_ACCEPT'] = 'application/xhtml+xml'
     status, headers, content = self.call_wsgi(self.env)
     self.assertResponse("200 OK",
                         {'Content-Type': 'application/xhtml+xml'},
                         util.readfile(self.repo.store.parsed_path("123/a"), "rb"),
                         status, headers, content)
예제 #53
0
 def ttl_to_rdf_xml(self, inpath, outpath, store=None):
     if not store:
         store = self.repo.store
     g = Graph()
     g.parse(data=util.readfile(inpath, encoding="utf-8"), format="turtle")
     with _open(outpath, "wb") as fp:
         fp.write(g.serialize(format="pretty-xml"))
     return g
예제 #54
0
 def transform(self, indata, config=None, parameters={}):
     strparams = {}
     if config:
         # paths to be used with the document() function
         # must use unix path separators
         if os.sep == "\\":
             config = config.replace(os.sep, "/")
         # print("Tranform: Using config %s. Contents:" % config)
         # print(util.readfile(config))
         config_fullpath = os.path.abspath(config)
         strparams['configurationfile'] = XSLT.strparam(config_fullpath)
     removefiles = []
     for key, value in parameters.items():
         if key.endswith("file") and value:
             if all(ord(c) < 128 and c != " " for c in value):
                 # IF the file name contains ONLY ascii chars and
                 # no spaces, we can use it directly. However, we
                 # need to relativize path of file relative to the
                 # XSL file we'll be using. The mechanism could be
                 # clearer...
                 value = os.path.relpath(value, self.templdir)
             else:
                 # If the filename contains non-ascii characters or
                 # space, any attempt to eg
                 # "document($annotationfile)" in the XSLT document
                 # will silently fail. Seriously, f**k lxml's error
                 # handling. In this case, copy it to a temp file
                 # (in the temporary templdir, with ascii filename)
                 # and use that.
                 contents = util.readfile(value)
                 value = os.path.basename(value)
                 value = "".join(c for c in value
                                 if ord(c) < 128 and c != " ")
                 removefiles.append(self.templdir + os.sep + value)
                 util.writefile(self.templdir + os.sep + value, contents)
             if os.sep == "\\":
                 value = value.replace(os.sep, "/")
         strparams[key] = XSLT.strparam(value)
     try:
         return self._transformer(indata, **strparams)
     except etree.XSLTApplyError as e:
         # the exception will only contain the last error. Errors
         # emanting from the xhtml file will not have file/line
         # number information. Errors emanting from the xslt file
         # do have file/line number info, and is probably more
         # useful to deal with.
         for error in self._transformer.error_log:
             if error.line:
                 log.error("%s: %s (line %s)" %
                           (error.filename, error.message, error.line))
         raise errors.TransformError(str(e))
     finally:
         for f in removefiles:
             util.robust_remove(f)
     # FIXME: This can never be reached, if _transformer() does not
     # raise an error, the above returns immediately.
     if len(self._transformer.error_log) > 0:
         raise errors.TransformError(str(_transformer.error_log))
예제 #55
0
파일: sfslegacy.py 프로젝트: zigit/ferenda
 def _checksum(self, filename):
     """MD5-checksumman för den angivna filen"""
     import hashlib
     c = hashlib.md5()
     try:
         c.update(util.readfile(filename, encoding=self.source_encoding))
     except:
         self.log.warning("Could not extract plaintext from %s" % filename)
     return c.hexdigest()
예제 #56
0
    def test_save(self):
        path = self.repo.store.documententry_path("123/a")
        d = DocumentEntry()
        d.orig_checked = datetime(2013,3,27,20,46,37)
        d.orig_url = 'http://source.example.org/doc/123/a'
        d.save(path=path)

        self.maxDiff = None
        self.assertEqual(self.d2u(util.readfile(path)), self.basic_json)
예제 #57
0
 def test_basic(self):
     # basic test 1: accept: text/html -> generated file
     # Note that our Accept header has a more complicated value
     # typical of a real-life browse
     status, headers, content = self.call_wsgi(self.env)
     self.assertResponse(
         "200 OK", {'Content-Type': 'text/html; charset=utf-8'},
         util.readfile(self.repo.store.generated_path("123/a"), "rb"),
         status, headers, content)
예제 #58
0
    def test_longdesc(self):
        # test 2: Same, but with a multi-line desc
        dconf = self.globalconf.base
        dconf.download_text = b"This is a file.\nIt has been downloaded.\n"

        repo = MockRepo(datadir=self.datadir)
        with repo.store.open_downloaded(self.basefile, "wb") as fp:
            fp.write(b"This is a file.\nIt has been patched.\n")
        longdesc = "A longer comment\nspanning\nseveral lines"

        patchpath = self.d.mkpatch("base", self.basefile, longdesc)
        self.assertTrue(patchpath)
        patchcontent = util.readfile(patchpath)
        desccontent = util.readfile(patchpath.replace(".patch", ".desc"))
        self.assertEqual(longdesc, desccontent)
        self.assertFalse("A longer comment" in patchcontent)
        self.assertIn("@@ -1,2 +1,2 @@", patchcontent)
        self.assertIn("-It has been downloaded.", patchcontent)
        self.assertIn("+It has been patched.", patchcontent)