Example #1
0
 def downloaded_to_intermediate(self, basefile):
     # Check to see if this might not be a proper SFS at all
     # (from time to time, other agencies publish their stuff
     # in SFS - this seems to be handled by giving those
     # documents a SFS nummer on the form "N1992:31". Filter
     # these out.
     if basefile.startswith('N'):
         raise IckeSFS("%s is not a regular SFS" % basefile)
     filename = self.store.downloaded_path(basefile)
     try:
         t = TextReader(filename, encoding=self.source_encoding)
     except IOError:
         self.log.warning("%s: Fulltext is missing" % basefile)
         # FIXME: This code needs to be rewritten
         baseuri = self.canonical_uri(basefile)
         if baseuri in registry:
             title = registry[baseuri].value(URIRef(baseuri),
                                             self.ns['dcterms'].title)
             desc.value(self.ns['dcterms'].title, title)
         desc.rel(self.ns['dcterms'].publisher,
                  self.lookup_resource("Regeringskansliet"))
         desc.value(self.ns['dcterms'].identifier, "SFS " + basefile)
         doc.body = Forfattning([Stycke(['Lagtext saknas'], id='S1')])
     # Check to see if the Författning has been revoked (using
     # plain fast string searching, no fancy HTML parsing and
     # traversing)
     if not self.config.keepexpired:
         try:
             t.cuepast('<i>Författningen är upphävd/skall upphävas: ')
             datestr = t.readto('</i></b>')
             if datetime.strptime(datestr, '%Y-%m-%d') < datetime.today():
                 self.log.debug('%s: Expired' % basefile)
                 raise UpphavdForfattning(
                     "%s is an expired SFS" % basefile,
                     dummyfile=self.store.parsed_path(basefile))
             t.seek(0)
         except IOError:
             t.seek(0)
     t.cuepast('<pre>')
     # remove &auml; et al
     try:
         # this is the preferred way from py34 onwards. FIXME: Move
         # this to ferenda.compat
         import html
         txt = html.unescape(t.readto('</pre>'))
     except ImportError:
         # this is the old way.
         hp = HTMLParser()
         txt = hp.unescape(t.readto('</pre>'))
     if '\r\n' not in txt:
         txt = txt.replace('\n', '\r\n')
     re_tags = re.compile("</?\w{1,3}>")
     txt = re_tags.sub('', txt)
     # add ending CRLF aids with producing better diffs
     txt += "\r\n"
     util.writefile(self.store.intermediate_path(basefile),
                    txt,
                    encoding=self.source_encoding)
     return codecs.open(self.store.intermediate_path(basefile),
                        encoding=self.source_encoding)
Example #2
0
 def test_depth(self):
     xsltfile = self.datadir+os.sep+"notused.xslt"
     util.writefile(xsltfile, '<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"/>')
     t = Transformer("XSLT", xsltfile, "xsl", None, "data")
     self.assertEqual(0, t._depth("data", "data/index.html"))
     self.assertEqual(1, t._depth("data/repo", "data/index.html"))
     self.assertEqual(3, t._depth("data/repo/toc/title", "data/index.html"))
Example #3
0
 def test_depth(self):
     xsltfile = self.datadir+os.sep+"notused.xslt"
     util.writefile(xsltfile, '<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"/>')
     t = Transformer("XSLT", xsltfile, ["res/xsl"], "data")
     self.assertEqual(0, t._depth("data", "data/index.html"))
     self.assertEqual(1, t._depth("data/repo", "data/index.html"))
     self.assertEqual(3, t._depth("data/repo/toc/title", "data/index.html"))
Example #4
0
 def _make_files(self, option, filedir, combinefile=None, combinefunc=None):
     urls = []
     buf = BytesIO()
     processed = set()
     # eg. self.config.cssfiles
     if getattr(self.config, option):  # it's possible to set eg
                                       # cssfiles=None when
                                       # creating the Resources
                                       # object
         for f in getattr(self.config, option):
             urls.append(self._process_file(f, buf, filedir, "ferenda.ini"))
             processed.add(f)
     for repo in self.repos:
         # FIXME: create a more generic way of optionally
         # signalling to a repo that "Hey, now it's time to create
         # your resources if you can"
         if repo.__class__.__name__ == "SFS" and option == "imgfiles":
             self.log.info("calling into SFS._makeimages()")
             LayeredConfig.set(repo.config, 'imgfiles', repo._makeimages())
         for f in getattr(repo.config, option):
             if f in processed:
                 continue
             urls.append(self._process_file(f, buf, filedir, repo.alias))
             processed.add(f)
     urls = list(filter(None, urls))
     if combinefile:
         txt = buf.getvalue().decode('utf-8')
         util.writefile(combinefile, combinefunc(txt))
         return [self._filepath_to_urlpath(combinefile, 2)]
     else:
         return urls
Example #5
0
    def make_resources_xml(self, cssfiles, jsfiles):
        E = ElementMaker()  # namespace = None, nsmap={None: ...}
        root = E.configuration(
            E.sitename(self.config.sitename),
            E.sitedescription(self.config.sitedescription),
            E.url(self.config.url),
            E.tabs(*self._links('tabs')),
            E.footerlinks(*self._links('footer')),
            E.stylesheets(*self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")),
            E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" "))
        )

        if not self.config.staticsite:
            root.append(
                E.search(
                    E.endpoint(self.config.searchendpoint)
                )
            )

        outfile = self.resourcedir + os.sep + "resources.xml"
        util.writefile(
            outfile,
            etree.tostring(
                root,
                encoding="utf-8",
                pretty_print=True).decode("utf-8"))
        self.log.info("Wrote %s" % outfile)
        return [self._filepath_to_urlpath(outfile, 1)]
Example #6
0
    def download_single(self, basefile, url=None):
        if url is None:
            result = self.query_webservice("DN = %s" % basefile, page=1)
            result.raise_for_status()
            tree = etree.parse(BytesIO(result.content))
            results = tree.findall(".//{http://eur-lex.europa.eu/search}result")
            assert len(results) == 1
            result = results[0]
            cellarid = result.find(".//{http://eur-lex.europa.eu/search}reference").text
            cellarid = re.split("[:_]", cellarid)[2]

            celex = result.find(".//{http://eur-lex.europa.eu/search}ID_CELEX")[0].text
            match = self.celexfilter(celex)
            assert match
            celex = match.group(1)
            assert celex == basefile
            lang, filetype, mimetype, url = self.find_manifestation(cellarid, celex)
            # FIXME: This is an ugly way of making sure the downloaded
            # file gets the right suffix (due to
            # DocumentStore.downloaded_path choosing a filename from among
            # several possible suffixes based on what file already exists
            downloaded_path = self.store.path(basefile, 'downloaded', '.'+filetype)
            if not os.path.exists(downloaded_path):
                util.writefile(downloaded_path, "")
        return super(EURLex, self).download_single(basefile, url)
Example #7
0
 def test_write_atom_inline(self):
     self.repo.faceted_data = Mock(return_value=self.faceted_data)
     for basefile in range(25):
         de = DocumentEntry(
             self.repo.store.documententry_path(str(basefile)))
         util.writefile(self.repo.store.parsed_path(str(basefile)),
                        "<html><p>Document #%s</p></html>" % basefile)
         de.set_content(self.repo.store.parsed_path(str(basefile)),
                        self.repo.canonical_uri(str(basefile)),
                        inline=True)
         de.save()
     unsorted_entries = self.repo.news_facet_entries()
     entries = sorted(list(unsorted_entries),
                      key=itemgetter('updated'),
                      reverse=True)
     self.repo.news_write_atom(entries,
                               'New and updated documents',
                               'main',
                               archivesize=6)
     tree = etree.parse('%s/base/feed/main.atom' % self.datadir)
     NS = "{http://www.w3.org/2005/Atom}"
     content = tree.find(".//" + NS + "content")
     self.assertIsNotNone(content)
     self.assertIsNone(content.get("src"))
     self.assertIsNone(content.get("hash"))
     self.assertEqual(content.get("type"), "xhtml")
     self.assertEqualXML(
         etree.tostring(content[0]),
         '<html xmlns="http://www.w3.org/2005/Atom" xmlns:le="http://purl.org/atompub/link-extensions/1.0"><p>Document #24</p></html>'
     )
Example #8
0
 def test_write_atom_inline(self):
     self.repo.faceted_data = Mock(return_value=self.faceted_data)
     for basefile in range(25):
         de = DocumentEntry(self.repo.store.documententry_path(str(basefile)))
         util.writefile(self.repo.store.parsed_path(str(basefile)),
                        "<html><p>Document #%s</p></html>" % basefile)
         de.set_content(self.repo.store.parsed_path(str(basefile)),
                        self.repo.canonical_uri(str(basefile)),
                        inline=True)
         de.save()
     unsorted_entries = self.repo.news_facet_entries()
     entries = sorted(list(unsorted_entries),
                      key=itemgetter('updated'), reverse=True)
     self.repo.news_write_atom(entries,
                               'New and updated documents',
                               'main',
                               archivesize=6)
     tree = etree.parse('%s/base/feed/main.atom' % self.datadir)
     NS = "{http://www.w3.org/2005/Atom}"
     content = tree.find(".//"+NS+"content")
     self.assertIsNotNone(content)
     self.assertIsNone(content.get("src"))
     self.assertIsNone(content.get("hash"))
     self.assertEqual(content.get("type"), "xhtml")
     self.assertEqualXML(etree.tostring(content[0]),
                           '<html xmlns="http://www.w3.org/2005/Atom" xmlns:le="http://purl.org/atompub/link-extensions/1.0"><p>Document #24</p></html>')
Example #9
0
 def download_single(self, basefile, url=None):
     if not url:
         entry = DocumentEntry(self.store.documententry_path(basefile))
         url = entry.orig_url
     xml_downloaded_path = self.store.downloaded_path(basefile).replace(".pdf", ".xml")
     if self.get_parse_options(basefile) == "metadataonly":
         # in these cases, to save space, get
         # the smaller XML OCR data, not the
         # actual scanned images-in-PDF
         url = url.replace(".pdf", ".xml").replace("pdf/web", "xml")
         # make store.downloaded_path return .xml suffixes (and set
         # the timestamp to the beginning of epoch so that the
         # resulting if-modified-since header doesn't contain the
         # current date/time
         if not os.path.exists(xml_downloaded_path):
             util.writefile(xml_downloaded_path, "")
             os.utime(xml_downloaded_path, (0,0))
     else:
         # if parse options have changed from metadataonly to
         # default, there will be a xml file lying about which will
         # make downloaded_path return its name. Remove it so that
         # we don't end up with pdf files that have a .xml
         # extension.
         if os.path.exists(xml_downloaded_path):
             os.unlink(xml_downloaded_path)
     return super(PropKB, self).download_single(basefile, url)
Example #10
0
    def test_run_makeresources(self):
        # 1. setup test_run_enable
        # 2. run('all', 'makeresources')
        # 3. verify that all css/jss files specified by default and in Testrepo gets copied
        #    (remove rsrc)
        # 4. run('all', 'makeresources', '--combine')
        # 5. verify that single css and js file is created
        self._enable_repos()
        s = os.sep
        want = {'css':[s.join(['rsrc', 'css','test.css']),
                       s.join(['rsrc', 'css','other.css'])],
                'js':[s.join(['rsrc', 'js','test.js'])],
                'xml':[s.join(['rsrc', 'resources.xml'])]
        }
        got = manager.run(['all', 'makeresources'])
        self.assertEqual(want,got)

        # 6. alter the ferenda.ini so that it doesn't specify any css/js files
        util.writefile("ferenda.ini", """[__root__]
loglevel=WARNING
datadir = %s
url = http://localhost:8000
searchendpoint = /search/
apiendpoint = /api/
        """ % self.tempdir)
        want = {'css':[],
                'js':[],
                'xml':[s.join(['rsrc', 'resources.xml'])]
        }
        got = manager.run(['all', 'makeresources'])
        self.assertEqual(want,got)
Example #11
0
 def _make_files(self, option, filedir, combinefile=None, combinefunc=None):
     urls = []
     buf = BytesIO()
     processed = set()
     # eg. self.config.cssfiles
     if getattr(self.config, option):  # it's possible to set eg
         # cssfiles=None when
         # creating the Resources
         # object
         for f in getattr(self.config, option):
             urls.append(self._process_file(f, buf, filedir, "ferenda.ini"))
             processed.add(f)
     for repo in self.repos:
         # FIXME: create a more generic way of optionally
         # signalling to a repo that "Hey, now it's time to create
         # your resources if you can"
         if repo.__class__.__name__ == "SFS" and option == "imgfiles":
             self.log.info("calling into SFS._makeimages()")
             LayeredConfig.set(repo.config, 'imgfiles', repo._makeimages())
         if hasattr(repo.config, option):
             for f in getattr(repo.config, option):
                 if f in processed:
                     continue
                 urls.append(self._process_file(f, buf, filedir,
                                                repo.alias))
                 processed.add(f)
     urls = list(filter(None, urls))
     if combinefile:
         txt = buf.getvalue().decode('utf-8')
         util.writefile(combinefile, combinefunc(txt))
         return [self._filepath_to_urlpath(combinefile, 2)]
     else:
         return urls
Example #12
0
    def setUp(self):
        self.maxDiff = None
        self.tempdir = tempfile.mkdtemp()
        # FIXME: this creates (and tearDown deletes) a file in
        # cwd. Should be placed in self.tempdir, but tests need to be
        # adjusted to find it there.

        # NB: The section keys are different from the specified
        # classes alias properties. This is intended.
        staticmockclass.resourcebase = self.tempdir
        util.writefile(
            "ferenda.ini", """[__root__]
datadir = %s
loglevel = CRITICAL
[test]
class=testManager.staticmockclass
[test2]
class=testManager.staticmockclass2
""" % self.tempdir)
        util.writefile(self.tempdir + "/test.js", "// test.js code goes here")
        util.writefile(self.tempdir + "/test.css",
                       "/* test.css code goes here */")
        util.writefile(self.tempdir + "/test.png",
                       "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a PNG data goes here")
        util.writefile(self.tempdir + "/transformed.scss",
                       "a { color: red + green; }")
Example #13
0
 def downloaded_to_intermediate(self, basefile, attachment=None):
     # Check to see if this might not be a proper SFS at all
     # (from time to time, other agencies publish their stuff
     # in SFS - this seems to be handled by giving those
     # documents a SFS nummer on the form "N1992:31". Filter
     # these out.
     if basefile.startswith('N'):
         raise IckeSFS("%s is not a regular SFS" % basefile)
     filename = self.store.downloaded_path(basefile)
     try:
         t = TextReader(filename, encoding=self.source_encoding)
     except IOError:
         self.log.warning("%s: Fulltext is missing" % basefile)
         # FIXME: This code needs to be rewritten
         baseuri = self.canonical_uri(basefile)
         if baseuri in registry:
             title = registry[baseuri].value(URIRef(baseuri),
                                             self.ns['dcterms'].title)
             desc.value(self.ns['dcterms'].title, title)
         desc.rel(self.ns['dcterms'].publisher,
                  self.lookup_resource("Regeringskansliet"))
         desc.value(self.ns['dcterms'].identifier, "SFS " + basefile)
         doc.body = Forfattning([Stycke(['Lagtext saknas'],
                                        id='S1')])
     # Check to see if the Författning has been revoked (using
     # plain fast string searching, no fancy HTML parsing and
     # traversing)
     if not self.config.keepexpired:
         try:
             t.cuepast('<i>Författningen är upphävd/skall upphävas: ')
             datestr = t.readto('</i></b>')
             if datetime.strptime(datestr, '%Y-%m-%d') < datetime.today():
                 self.log.debug('%s: Expired' % basefile)
                 raise UpphavdForfattning("%s is an expired SFS" % basefile,
                                          dummyfile=self.store.parsed_path(basefile))
             t.seek(0)
         except IOError:
             t.seek(0)
     t.cuepast('<pre>')
     # remove &auml; et al
     try:
         # this is the preferred way from py34 onwards. FIXME: Move
         # this to ferenda.compat
         import html
         txt = html.unescape(t.readto('</pre>'))
     except ImportError:
         # this is the old way.
         hp = HTMLParser()
         txt = hp.unescape(t.readto('</pre>'))
     if '\r\n' not in txt:
         txt = txt.replace('\n', '\r\n')
     re_tags = re.compile("</?\w{1,3}>")
     txt = re_tags.sub('', txt)
     # add ending CRLF aids with producing better diffs
     txt += "\r\n"
     util.writefile(self.store.intermediate_path(basefile), txt,
                    encoding=self.source_encoding)
     return codecs.open(self.store.intermediate_path(basefile),
                        encoding=self.source_encoding)
Example #14
0
    def parse(self, doc):
        doc.uri = self.canonical_uri(doc.basefile)
        d = Describer(doc.meta, doc.uri)
        d.rdftype(self.rdf_type)
        d.value(self.ns['prov'].wasGeneratedBy, self.qualified_class_name())
        self.infer_triples(d, doc.basefile)

        # prefer PDF or Word files over the plaintext-containing HTML files
        # FIXME: PDF or Word files are now stored as attachments

        pdffile = self.generic_path(doc.basefile, 'downloaded', '.pdf')

        wordfiles = (self.generic_path(doc.basefile, 'downloaded', '.doc'),
                     self.generic_path(doc.basefile, 'downloaded', '.docx'),
                     self.generic_path(doc.basefile, 'downloaded', '.wpd'),
                     self.generic_path(doc.basefile, 'downloaded', '.rtf'))
        wordfile = None
        for f in wordfiles:
            if os.path.exists(f):
                wordfile = f

        # if we lack a .pdf file, use Open/LibreOffice to convert any
        # .wpd or .doc file to .pdf first
        if (wordfile
                and not os.path.exists(pdffile)):
            intermediate_pdf = self.generic_path(
                doc.basefile, "intermediate", ".pdf")
            if not os.path.exists(intermediate_pdf):
                cmdline = "%s --headless -convert-to pdf -outdir '%s' %s" % (self.config.get('soffice', 'soffice'),
                                                                             os.path.dirname(
                                                                                 intermediate_pdf),
                                                                             wordfile)
                self.log.debug(
                    "%s: Converting to PDF: %s" % (doc.basefile, cmdline))
                (ret, stdout, stderr) = util.runcmd(
                    cmdline, require_success=True)
            pdffile = intermediate_pdf

        if os.path.exists(pdffile):
            self.log.debug("%s: Using %s" % (doc.basefile, pdffile))
            intermediate_dir = os.path.dirname(
                self.generic_path(doc.basefile, 'intermediate', '.foo'))
            self.setup_logger('pdfreader', self.config.get('log', 'INFO'))
            pdfreader = PDFReader()
            pdfreader.read(pdffile, intermediate_dir)
            self.parse_from_pdfreader(pdfreader, doc)
        else:
            downloaded_path = self.downloaded_path(doc.basefile)
            intermediate_path = self.generic_path(
                doc.basefile, 'intermediate', '.txt')
            self.log.debug("%s: Using %s (%s)" % (doc.basefile,
                           downloaded_path, intermediate_path))
            if not os.path.exists(intermediate_path):
                html = codecs.open(
                    downloaded_path, encoding="iso-8859-1").read()
                util.writefile(intermediate_path, util.extract_text(
                    html, '<pre>', '</pre>'), encoding="utf-8")
            textreader = TextReader(intermediate_path, encoding="utf-8")
            self.parse_from_textreader(textreader, doc)
Example #15
0
 def transform(self, indata, config=None, parameters={}):
     strparams = {}
     if config:
         # paths to be used with the document() function
         # must use unix path separators
         if os.sep == "\\":
             config = config.replace(os.sep, "/")
         # print("Tranform: Using config %s. Contents:" % config)
         # print(util.readfile(config))
         config_fullpath = os.path.abspath(config)
         strparams['configurationfile'] = XSLT.strparam(config_fullpath)
     removefiles = []
     for key, value in parameters.items():
         if key.endswith("file") and value:
             if all(ord(c) < 128 and c != " " for c in value):
                 # IF the file name contains ONLY ascii chars and
                 # no spaces, we can use it directly. However, we
                 # need to relativize path of file relative to the
                 # XSL file we'll be using. The mechanism could be
                 # clearer...
                 value = os.path.relpath(value, self.templdir)
             else:
                 # If the filename contains non-ascii characters or
                 # space, any attempt to eg
                 # "document($annotationfile)" in the XSLT document
                 # will silently fail. Seriously, f**k lxml's error
                 # handling. In this case, copy it to a temp file
                 # (in the temporary templdir, with ascii filename)
                 # and use that.
                 contents = util.readfile(value)
                 value = os.path.basename(value)
                 value = "".join(c for c in value
                                 if ord(c) < 128 and c != " ")
                 removefiles.append(self.templdir + os.sep + value)
                 util.writefile(self.templdir + os.sep + value, contents)
             if os.sep == "\\":
                 value = value.replace(os.sep, "/")
         strparams[key] = XSLT.strparam(value)
     try:
         return self._transformer(indata, **strparams)
     except etree.XSLTApplyError as e:
         # the exception will only contain the last error. Errors
         # emanting from the xhtml file will not have file/line
         # number information. Errors emanting from the xslt file
         # do have file/line number info, and is probably more
         # useful to deal with.
         for error in self._transformer.error_log:
             if error.line:
                 log.error("%s: %s (line %s)" %
                           (error.filename, error.message, error.line))
         raise errors.TransformError(str(e))
     finally:
         for f in removefiles:
             util.robust_remove(f)
     # FIXME: This can never be reached, if _transformer() does not
     # raise an error, the above returns immediately.
     if len(self._transformer.error_log) > 0:
         raise errors.TransformError(str(_transformer.error_log))
Example #16
0
 def parse(self, basefile):
     if basefile in ("1", "3"):
         util.writefile(self.store.parsed_path(basefile),
                        "basefile %s, parsed by a" % basefile)
         util.writefile(self.store.distilled_path(basefile),
                        "basefile %s, metadata from a" % basefile)
         return True
     else:
         return False  # we don't even have this basefile
Example #17
0
 def parse(self, basefile):
     if basefile in ("1", "3"):
         util.writefile(self.store.parsed_path(basefile),
                        "basefile %s, parsed by a" % basefile)
         util.writefile(self.store.distilled_path(basefile),
                        "basefile %s, metadata from a" % basefile)
         return True
     else:
         return False # we don't even have this basefile
 def test_get_serialized_file(self):
     want = tempfile.mktemp(suffix=".nt")
     util.writefile(want, util.readfile("test/files/datasets/dataset.nt"))
     got = tempfile.mktemp(suffix=".nt")
     self.loader.add_serialized(
         util.readfile("test/files/datasets/dataset.nt"),format="nt")
     del self.loader
     self.store.get_serialized_file(got, format="nt")
     self.assertEqualGraphs(want,got)
Example #19
0
 def test_get_serialized_file(self):
     want = tempfile.mktemp(suffix=".nt")
     util.writefile(want, util.readfile("test/files/datasets/dataset.nt"))
     got = tempfile.mktemp(suffix=".nt")
     self.loader.add_serialized(
         util.readfile("test/files/datasets/dataset.nt"),format="nt")
     del self.loader
     self.store.get_serialized_file(got, format="nt")
     self.assertEqualGraphs(want,got)
Example #20
0
    def test_combining(self):
        # Test2: combining, resources specified by global config
        # (maybe we should use smaller CSS+JS files? Test takes 2+ seconds...)
        s = os.sep
        want = {'css':[s.join(['rsrc', 'css','combined.css'])],
                'js':[s.join(['rsrc', 'js','combined.js'])],
                'img': [],
                'xml':[s.join(['rsrc', 'resources.xml'])]
        }
        testcss = ["css/ferenda.css",
                   "res/css/fake1.css",
                   "res/css/fake2.css"]
        testjs = ["js/ferenda.js",
                  "res/js/fake1.js",
                  "res/js/fake2.js"]
        resources = Resources([staticmockclass(),staticmockclass2()],self.tempdir+os.sep+'rsrc',
                              combineresources=True,
                              cssfiles=testcss,
                              jsfiles=testjs,
                              sitename="Blahonga",
                              sitedescription="A non-default value")
        rl = resources.resourceloader
        testcssfiles = []
        testjsfiles = []
        for cssfile in testcss:
            try:
                testcssfiles.append(rl.filename(cssfile))
            except errors.ResourceNotFound:
                util.writefile(cssfile, "/* this is a faked css file: %s */" % cssfile*1000)
                testcssfiles.append(cssfile)

        for jsfile in testjs:
            try:
                testjsfiles.append(rl.filename(jsfile))
            except errors.ResourceNotFound:
                util.writefile(jsfile, "/* this is a faked js file: %s */" % jsfile*1000)
                testjsfiles.append(jsfile)

        got = resources.make(api=False)
        self.assertEqual(want,got)
        tree = ET.parse(self.tempdir+'/'+got['xml'][0])
        stylesheets=tree.find("stylesheets").getchildren()
        self.assertEqual(len(stylesheets),1)
        self.assertEqual(stylesheets[0].attrib['href'],'rsrc/css/combined.css')
        javascripts=tree.find("javascripts").getchildren()
        self.assertEqual(len(javascripts),1)
        self.assertEqual(javascripts[0].attrib['src'],'rsrc/js/combined.js')
        self.assertEqual(tree.find("sitename").text,"Blahonga")
        self.assertEqual(tree.find("sitedescription").text,"A non-default value")
        self.assertTrue(os.path.exists(self.tempdir+'/rsrc/css/combined.css'))
        self.assertTrue(os.path.exists(self.tempdir+'/rsrc/js/combined.js'))
        # check that the combining/minifying indeed saved us some space
        self.assertLess(os.path.getsize(self.tempdir+'/rsrc/css/combined.css'),
                        sum([os.path.getsize(x) for x in testcssfiles]))
        self.assertLess(os.path.getsize(self.tempdir+'/rsrc/js/combined.js'),
                        sum([os.path.getsize(x) for x in testjsfiles]))
Example #21
0
 def transform(self, indata, config=None, parameters={}):
     strparams = {}
     if config:
         # paths to be used with the document() function
         # must use unix path separators
         if os.sep == "\\":
             config = config.replace(os.sep, "/")
         # print("Tranform: Using config %s. Contents:" % config)
         # print(util.readfile(config))
         config_fullpath = os.path.abspath(config)
         strparams['configurationfile'] = XSLT.strparam(config_fullpath)
     removefiles = []
     for key, value in parameters.items():
         if key.endswith("file") and value:
             if all(ord(c) < 128 and c != " " for c in value):
                 # IF the file name contains ONLY ascii chars and
                 # no spaces, we can use it directly. However, we
                 # need to relativize path of file relative to the
                 # XSL file we'll be using. The mechanism could be
                 # clearer...
                 value = os.path.relpath(value, self.templdir)
             else:
                 # If the filename contains non-ascii characters or
                 # space, any attempt to eg
                 # "document($annotationfile)" in the XSLT document
                 # will silently fail. Seriously, f**k lxml's error
                 # handling. In this case, copy it to a temp file
                 # (in the temporary templdir, with ascii filename)
                 # and use that.
                 contents = util.readfile(value)
                 value = os.path.basename(value)
                 value = "".join(c for c in value if ord(c) < 128 and c != " ")
                 removefiles.append(self.templdir+os.sep+value)
                 util.writefile(self.templdir+os.sep+value, contents)
             if os.sep == "\\":
                 value = value.replace(os.sep, "/")
         strparams[key] = XSLT.strparam(value)
     try:
         return self._transformer(indata, **strparams)
     except etree.XSLTApplyError as e:
         # the exception will only contain the last error. Errors
         # emanting from the xhtml file will not have file/line
         # number information. Errors emanting from the xslt file
         # do have file/line number info, and is probably more
         # useful to deal with.
         for error in self._transformer.error_log:
             if error.line:
                 log.error("%s: %s (line %s)" % (error.filename, error.message, error.line))
         raise errors.TransformError(str(e))
     finally:
         for f in removefiles:
             util.robust_remove(f)
     # FIXME: This can never be reached, if _transformer() does not
     # raise an error, the above returns immediately.
     if len(self._transformer.error_log) > 0:
         raise errors.TransformError(str(_transformer.error_log))
Example #22
0
    def test_combining(self):
        # Test2: combining, resources specified by global config
        # (maybe we should use smaller CSS+JS files? Test takes 2+ seconds...)
        s = os.sep
        want = {'css':[s.join(['rsrc', 'css','combined.css'])],
                'js':[s.join(['rsrc', 'js','combined.js'])],
                'img': [],
                'xml':[s.join(['rsrc', 'resources.xml'])]
        }
        testcss = ["css/ferenda.css",
                   "res/css/fake1.css",
                   "res/css/fake2.css"]
        testjs = ["js/ferenda.js",
                  "res/js/fake1.js",
                  "res/js/fake2.js"]
        resources = Resources([staticmockclass(),staticmockclass2()],self.tempdir+os.sep+'rsrc',
                              combineresources=True,
                              cssfiles=testcss,
                              jsfiles=testjs,
                              sitename="Blahonga",
                              sitedescription="A non-default value")
        rl = resources.resourceloader
        testcssfiles = []
        testjsfiles = []
        for cssfile in testcss:
            try:
                testcssfiles.append(rl.filename(cssfile))
            except errors.ResourceNotFound:
                util.writefile(cssfile, "/* this is a faked css file: %s */" % cssfile*1000)
                testcssfiles.append(cssfile)

        for jsfile in testjs:
            try:
                testjsfiles.append(rl.filename(jsfile))
            except errors.ResourceNotFound:
                util.writefile(jsfile, "/* this is a faked js file: %s */" % jsfile*1000)
                testjsfiles.append(jsfile)

        got = resources.make(api=False)
        self.assertEqual(want,got)
        tree = ET.parse(self.tempdir+'/'+got['xml'][0])
        stylesheets=tree.find("stylesheets").getchildren()
        self.assertEqual(len(stylesheets),1)
        self.assertEqual(stylesheets[0].attrib['href'],'rsrc/css/combined.css')
        javascripts=tree.find("javascripts").getchildren()
        self.assertEqual(len(javascripts),1)
        self.assertEqual(javascripts[0].attrib['src'],'rsrc/js/combined.js')
        self.assertEqual(tree.find("sitename").text,"Blahonga")
        self.assertEqual(tree.find("sitedescription").text,"A non-default value")
        self.assertTrue(os.path.exists(self.tempdir+'/rsrc/css/combined.css'))
        self.assertTrue(os.path.exists(self.tempdir+'/rsrc/js/combined.js'))
        # check that the combining/minifying indeed saved us some space
        self.assertLess(os.path.getsize(self.tempdir+'/rsrc/css/combined.css'),
                        sum([os.path.getsize(x) for x in testcssfiles]))
        self.assertLess(os.path.getsize(self.tempdir+'/rsrc/js/combined.js'),
                        sum([os.path.getsize(x) for x in testjsfiles]))
Example #23
0
 def parse(self, doc):
     # create an intermediate file before we know the correct
     # path for it. Later steps should move this file to the
     # correct place.
     util.writefile(self.store.intermediate_path(doc.basefile), "dummy")
     doc.meta.add((rdflib.URIRef(doc.uri), DCTERMS.title,
                   rdflib.Literal("Hello World", lang="en")))
     doc.body = Body([H1(["Hello world"])])
     doc.basefile = doc.basefile.replace("a/", "b/")
     return True
Example #24
0
    def test_republishsource(self):
        self.repo.config.republishsource = True
        for basefile in range(25):
            util.writefile(self.repo.store.downloaded_path(str(basefile)),
                           "Source content")

        entries = sorted(list(self.repo.news_entries()),
                         key=attrgetter('updated'), reverse=True)
        self.assertEqual(entries[0].content['src'],
                         self.repo.downloaded_url("24"))
Example #25
0
 def test_list_basefiles_file(self):
     files = ["downloaded/123/a.html",
              "downloaded/123/b.html",
              "downloaded/124/a.html",
              "downloaded/124/b.html"]
     basefiles = ["124/b", "124/a", "123/b", "123/a"]
     for f in files:
         util.writefile(self.p(f),"Nonempty")
     self.assertEqual(list(self.store.list_basefiles_for("parse")),
                      basefiles)
Example #26
0
 def test_list_basefiles_postgenerate_file(self):
     files = ["generated/123/a.html",
              "generated/123/b.html",
              "generated/124/a.html",
              "generated/124/b.html"]
     basefiles = ["124/b", "124/a", "123/b", "123/a"]
     for f in files:
         util.writefile(self.p(f),"nonempty")
     self.assertEqual(list(self.store.list_basefiles_for("_postgenerate")),
                      basefiles)
Example #27
0
    def test_republishsource(self):
        self.repo.config.republishsource = True
        for basefile in range(25):
            util.writefile(self.repo.store.downloaded_path(str(basefile)),
                           "Source content")

        entries = sorted(list(self.repo.news_entries()),
                         key=attrgetter('updated'), reverse=True)
        self.assertEqual(entries[0].content['src'],
                         self.repo.downloaded_url("24"))
Example #28
0
 def parse(self, doc):
     # create an intermediate file before we know the correct
     # path for it. Later steps should move this file to the
     # correct place.
     util.writefile(self.store.intermediate_path(doc.basefile), "dummy")
     doc.meta.add((rdflib.URIRef(doc.uri), DCTERMS.title,
                   rdflib.Literal("Hello World", lang="en")))
     doc.body = Body([H1(["Hello world"])])
     doc.basefile = doc.basefile.replace("a/", "b/")
     return True
Example #29
0
 def test_list_basefiles_postgenerate_file(self):
     files = ["generated/123/a.html",
              "generated/123/b.html",
              "generated/124/a.html",
              "generated/124/b.html"]
     basefiles = ["124/b", "124/a", "123/b", "123/a"]
     for f in files:
         util.writefile(self.p(f),"nonempty")
     self.assertEqual(list(self.store.list_basefiles_for("_postgenerate")),
                      basefiles)
Example #30
0
 def test_list_basefiles_file(self):
     files = ["downloaded/123/a.html",
              "downloaded/123/b.html",
              "downloaded/124/a.html",
              "downloaded/124/b.html"]
     basefiles = ["124/b", "124/a", "123/b", "123/a"]
     for f in files:
         util.writefile(self.p(f),"Nonempty")
     self.assertEqual(list(self.store.list_basefiles_for("parse")),
                      basefiles)
Example #31
0
    def test_ifneeded_relate(self):
        @ifneeded("relate")
        def testfunc(repo, basefile, needed):
            repo.called = True
            repo.needed = needed

        try:
            datadir = tempfile.mkdtemp()
            mockbasefile = "1234"
            mockrepo = Mock()
            mockrepo.store = DocumentStore(datadir=datadir)
            mockrepo.called = False
            mockrepo.config.force = False

            # create some docentry file in a good place
            de = DocumentEntry(mockrepo.store.documententry_path("1234"))
            now = datetime.datetime.now()
            de.indexed_ts = now + datetime.timedelta(seconds=3600)
            de.indexed_ft = now + datetime.timedelta(seconds=-3600)
            de.indexed_dep = now + datetime.timedelta(seconds=-3600)
            de.save()

            # test 1: Outfile is newer - the ifneeded decorator should
            # make sure the actual testfunc code is never reached

            # NOTE: the "relate" branch of DocumentStore.needed
            # doesn't use outfile_is_newer, so we can't patch that, we
            # have to create actual files
            parsedpath = mockrepo.store.parsed_path("1234")
            util.writefile(parsedpath, "dummy")
            os.utime(parsedpath, (now.timestamp(), now.timestamp() - 7200))
            testfunc(mockrepo, mockbasefile)
            self.assertFalse(mockrepo.called)
            mockrepo.called = False

            # test 2: Outfile is older than the information in the documententry file
            os.utime(parsedpath, (now.timestamp(), now.timestamp()))
            testfunc(mockrepo, mockbasefile)
            self.assertTrue(mockrepo.called)
            self.assertTrue(mockrepo.needed)
            self.assertFalse(mockrepo.needed.triples)
            self.assertFalse(mockrepo.needed.dependencies)
            self.assertTrue(mockrepo.needed.fulltext)

            mockrepo.called = False
            # test 3: Outfile is newer, but the global force option was set
            os.utime(parsedpath, (now.timestamp(), now.timestamp() - 7200))
            mockrepo.config.force = True
            testfunc(mockrepo, mockbasefile)
            self.assertTrue(mockrepo.called)
            mockrepo.config.force = None
            mockrepo.called = False
        finally:
            if os.path.exists(datadir):
                shutil.rmtree(datadir)
Example #32
0
    def test_ifneeded_relate(self):
        @ifneeded("relate")
        def testfunc(repo, basefile, needed):
            repo.called = True
            repo.needed = needed

        try:
            datadir = tempfile.mkdtemp()
            mockbasefile = "1234"
            mockrepo = Mock()
            mockrepo.store = DocumentStore(datadir=datadir)
            mockrepo.called = False
            mockrepo.config.force = False

            # create some docentry file in a good place
            de = DocumentEntry(mockrepo.store.documententry_path("1234"))
            now = datetime.datetime.now()
            de.indexed_ts = now + datetime.timedelta(seconds=3600)
            de.indexed_ft = now + datetime.timedelta(seconds=-3600)
            de.indexed_dep = now + datetime.timedelta(seconds=-3600)
            de.save()

            # test 1: Outfile is newer - the ifneeded decorator should
            # make sure the actual testfunc code is never reached

            # NOTE: the "relate" branch of DocumentStore.needed
            # doesn't use outfile_is_newer, so we can't patch that, we
            # have to create actual files
            parsedpath = mockrepo.store.parsed_path("1234")
            util.writefile(parsedpath,  "dummy")
            os.utime(parsedpath, (now.timestamp(), now.timestamp() - 7200))
            testfunc(mockrepo, mockbasefile)
            self.assertFalse(mockrepo.called)
            mockrepo.called = False

            # test 2: Outfile is older than the information in the documententry file
            os.utime(parsedpath, (now.timestamp(), now.timestamp()))
            testfunc(mockrepo, mockbasefile)
            self.assertTrue(mockrepo.called)
            self.assertTrue(mockrepo.needed)
            self.assertFalse(mockrepo.needed.triples)
            self.assertFalse(mockrepo.needed.dependencies)
            self.assertTrue(mockrepo.needed.fulltext)
            
            mockrepo.called = False
            # test 3: Outfile is newer, but the global force option was set
            os.utime(parsedpath, (now.timestamp(), now.timestamp() - 7200))
            mockrepo.config.force = True
            testfunc(mockrepo, mockbasefile)
            self.assertTrue(mockrepo.called)
            mockrepo.config.force = None
            mockrepo.called = False
        finally:
            if os.path.exists(datadir):
                shutil.rmtree(datadir)
Example #33
0
    def download(self, basefile=None):
        # Get all "term sets" (used dcterms:subject Objects, wiki pages
        # describing legal concepts, swedish wikipedia pages...)
        terms = defaultdict(dict)

        # 1) Query the triplestore for all dcterms:subject triples (is this
        # semantically sensible for a "download" action -- the content
        # isn't really external?) -- term set "subjects" (these come
        # from both court cases and legal definitions in law text)
        sq = """
        PREFIX dcterms:<http://purl.org/dc/terms/>
        PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>

        SELECT ?uri ?subject ?label
        WHERE { {?uri dcterms:subject ?subject . }
                OPTIONAL {?subject rdfs:label ?label . } }
        """
        store = TripleStore.connect(self.config.storetype,
                                    self.config.storelocation,
                                    self.config.storerepository)
        results = store.select(sq, "python")
        for row in results:
            if 'label' in row:
                label = row['label']
            else:
                label = self.basefile_from_uri(row['subject'])
                if label is None:
                    self.log.warning("could not determine keyword from %s" % row['subject'])
                    continue
            
            sanitized = self.sanitize_term(label)
            if sanitized:
                if sanitized not in terms:
                    terms[sanitized]['subjects'] = []
                terms[sanitized]['subjects'].append(row['uri'])

        self.log.debug("Retrieved %s subject terms from triplestore" % len(terms))

        for termset_func in self.termset_funcs:
            termset_func(terms)

        for term in terms:
            term = self.sanitize_term(term)
            if not term:
                continue
            oldterms = ""
            termpath = self.store.downloaded_path(term)
            if os.path.exists(termpath):
                oldterms = yaml.load(util.readfile(termpath))
            if terms[term] != oldterms:
                util.ensure_dir(termpath)
                util.writefile(termpath, yaml.dump(terms[term], default_flow_style=False))
                self.log.info("%s: in %s termsets" % (term, len(terms[term])))
            else:
                self.log.debug("%s: skipped" % term)
Example #34
0
 def test_listdirs(self):
     util.writefile(self.p("foo.txt"), "Hello")
     util.writefile(self.p("bar.txt"), "Hello")
     util.writefile(self.p("foo/2.txt"), "Hello")
     util.writefile(self.p("foo/10.txt"), "Hello")
     util.writefile(self.datadir+"/foo/baz.text", "Hello")
     generator = util.list_dirs(self.datadir, ".txt")
     self.assertEqual(self.p("bar.txt"), next(generator))
     self.assertEqual([self.p("foo.txt"),
                       self.p("foo/2.txt"),
                       self.p("foo/10.txt")], list(generator))
Example #35
0
 def test_list_versions_file(self):
     files = ["archive/downloaded/123/a/1.html",
              "archive/downloaded/123/a/2.html",
              "archive/downloaded/123/a/2bis.html",
              "archive/downloaded/123/a/10.html"]
     versions = ["1","2", "2bis", "10"]
     for f in files:
         util.writefile(self.p(f),"nonempty")
         # list_versions(action, basefile)
     self.assertEqual(list(self.store.list_versions("123/a","downloaded")),
                      versions)
Example #36
0
 def test_list_versions_file(self):
     files = ["archive/downloaded/123/a/.versions/1.html",
              "archive/downloaded/123/a/.versions/2.html",
              "archive/downloaded/123/a/.versions/2bis.html",
              "archive/downloaded/123/a/.versions/10.html"]
     versions = ["1","2", "2bis", "10"]
     for f in files:
         util.writefile(self.p(f),"nonempty")
         # list_versions(action, basefile)
     self.assertEqual(list(self.store.list_versions("123/a","downloaded")),
                      versions)
Example #37
0
    def test_list_basefiles_generate_dir(self):
        files = ["parsed/123/a/index.xhtml",
                 "parsed/123/b/index.xhtml",
                 "parsed/124/a/index.xhtml",
                 "parsed/124/b/index.xhtml"]
        basefiles = ["124/b", "124/a", "123/b", "123/a"]

        self.store.storage_policy = "dir"
        for f in files:
            util.writefile(self.p(f),"nonempty")
        self.assertEqual(list(self.store.list_basefiles_for("generate")),
                         basefiles)
Example #38
0
 def test_list_versions_dir(self):
     files = ["archive/downloaded/123/a/1/index.html",
              "archive/downloaded/123/a/2/index.html",
              "archive/downloaded/123/a/2bis/index.html",
              "archive/downloaded/123/a/10/index.html"]
     basefiles = ['123/a']
     versions = ["1","2", "2bis", "10"]
     for f in files:
         util.writefile(self.p(f),"nonempty")
     self.store.storage_policy = "dir"
     self.assertEqual(list(self.store.list_versions("123/a", "downloaded")),
                      versions)
Example #39
0
    def test_download(self):
        # create a basic.json + 1-2 resources
        os.mkdir(self.datadir + "/source")
        with open(self.datadir + "/source/basic.json", "w") as fp:
            s = json.dumps(self.basicjson, separators=(', ', ': '))
            fp.write(s)

        util.writefile(self.datadir + "/source/index.html",
                       "<p><a href='doc/a_.html'>ID: a</a></p>")
        util.writefile(self.datadir + "/source/a_.html",
                       "<p>This is doc A</p>")
        self._runtest()
Example #40
0
 def test_distill_setfile(self):
     os.mkdir(self.datadir+"/downloaded")
     util.writefile(self.datadir+"/downloaded/a.html",
                    "<p>This is doc A</p>")
     util.writefile(self.datadir+"/distilled/a.ttl",  "")
     os.environ["FERENDA_SET_TESTFILE"] = "1"
     with patch("builtins.print") as printmock:
         self._runtest()
     del os.environ["FERENDA_SET_TESTFILE"]
     self.assertEqual(self.expected_ttl,
                      util.readfile(self.datadir+"/distilled/a.ttl"))
     pass
Example #41
0
 def test_download(self):
     # create a basic.json + 1-2 resources
     os.mkdir(self.datadir+"/source")
     with open(self.datadir+"/source/basic.json", "w") as fp:
         s = json.dumps(self.basicjson, separators=(', ', ': '))
         fp.write(s)
         
     util.writefile(self.datadir+"/source/index.html",
                    "<p><a href='doc/a_.html'>ID: a</a></p>")
     util.writefile(self.datadir+"/source/a_.html",
                    "<p>This is doc A</p>")
     self._runtest()
Example #42
0
 def test_distill_setfile(self):
     os.mkdir(self.datadir + "/downloaded")
     util.writefile(self.datadir + "/downloaded/a.html",
                    "<p>This is doc A</p>")
     util.writefile(self.datadir + "/distilled/a.ttl", "")
     os.environ["FERENDA_SET_TESTFILE"] = "1"
     with patch("builtins.print") as printmock:
         self._runtest()
     del os.environ["FERENDA_SET_TESTFILE"]
     self.assertEqual(self.expected_ttl,
                      util.readfile(self.datadir + "/distilled/a.ttl"))
     pass
Example #43
0
    def test_list_basefiles_generate_dir(self):
        files = ["parsed/123/a/index.xhtml",
                 "parsed/123/b/index.xhtml",
                 "parsed/124/a/index.xhtml",
                 "parsed/124/b/index.xhtml"]
        basefiles = ["124/b", "124/a", "123/b", "123/a"]

        self.store.storage_policy = "dir"
        for f in files:
            util.writefile(self.p(f),"nonempty")
        self.assertEqual(list(self.store.list_basefiles_for("generate")),
                         basefiles)
Example #44
0
 def test_list_versions_dir(self):
     files = ["archive/downloaded/123/a/.versions/1/index.html",
              "archive/downloaded/123/a/.versions/2/index.html",
              "archive/downloaded/123/a/.versions/2bis/index.html",
              "archive/downloaded/123/a/.versions/10/index.html"]
     basefiles = ['123/a']
     versions = ["1","2", "2bis", "10"]
     for f in files:
         util.writefile(self.p(f),"nonempty")
     self.store.storage_policy = "dir"
     self.assertEqual(list(self.store.list_versions("123/a", "downloaded")),
                      versions)
Example #45
0
 def test_list_attachments(self):
     self.store.storage_policy = "dir" # attachments require this
     files = ["downloaded/123/a/index.html",
              "downloaded/123/a/attachment.html",
              "downloaded/123/a/appendix.pdf",
              "downloaded/123/a/other.txt"]
     basefiles = ['123/a']
     attachments = ['appendix.pdf', 'attachment.html', 'other.txt']
     for f in files:
         util.writefile(self.p(f),"nonempty")
         # list_attachments(action, basefile, version=None)
     self.assertEqual(list(self.store.list_attachments("123/a", "downloaded")),
                      attachments)
Example #46
0
 def test_list_attachments(self):
     self.store.storage_policy = "dir" # attachments require this
     files = ["downloaded/123/a/index.html",
              "downloaded/123/a/attachment.html",
              "downloaded/123/a/appendix.pdf",
              "downloaded/123/a/other.txt"]
     basefiles = ['123/a']
     attachments = ['appendix.pdf', 'attachment.html', 'other.txt']
     for f in files:
         util.writefile(self.p(f),"nonempty")
         # list_attachments(action, basefile, version=None)
     self.assertEqual(list(self.store.list_attachments("123/a", "downloaded")),
                      attachments)
Example #47
0
    def test_list_basefiles_parse_dir(self):
        files = ["downloaded/123/a/index.html",
                 "downloaded/123/b/index.html",
                 "downloaded/124/a/index.html",
                 "downloaded/124/b/index.html"]
        basefiles = ["124/b", "124/a", "123/b", "123/a"]

        self.store.storage_policy = "dir"
        for f in files:
            p = self.p(f)
            util.writefile(p,"nonempty")
        self.assertEqual(list(self.store.list_basefiles_for("parse")),
                         basefiles)
Example #48
0
    def test_list_basefiles_parse_dir(self):
        files = ["downloaded/123/a/index.html",
                 "downloaded/123/b/index.html",
                 "downloaded/124/a/index.html",
                 "downloaded/124/b/index.html"]
        basefiles = ["124/b", "124/a", "123/b", "123/a"]

        self.store.storage_policy = "dir"
        for f in files:
            p = self.p(f)
            util.writefile(p,"nonempty")
        self.assertEqual(list(self.store.list_basefiles_for("parse")),
                         basefiles)
Example #49
0
 def test_list_attachments(self):
     files = [
         "downloaded/123/a/index.html",
         "downloaded/123/a/attachment.html",
         "downloaded/123/a/appendix.pdf",
         "downloaded/123/a/other.txt",
     ]
     basefiles = ["123/a"]
     attachments = ["appendix.pdf", "attachment.html", "other.txt"]
     for f in files:
         util.writefile(self.p(f), "nonempty")
         # list_attachments(action, basefile, version=None)
     self.assertEqual(list(self.store.list_attachments("123/a", "downloaded")), attachments)
Example #50
0
    def test_run_makeresources(self):
        # 1. setup test_run_enable
        # 2. run('all', 'makeresources')
        # 3. verify that all css/jss files specified by default and in
        #    Testrepo gets copied (remove rsrc)
        # 4. run('all', 'makeresources', '--combine')
        # 5. verify that single css and js file is created
        self._enable_repos()
        s = os.sep
        want = {
            'css': [
                s.join(['rsrc', 'css', 'test.css']),
                s.join(['rsrc', 'css', 'other.css'])
            ],
            'js': [s.join(['rsrc', 'js', 'test.js'])],
            'img': [s.join(['rsrc', 'img', 'test.png'])],
            'json': [
                s.join(['rsrc', 'api', 'context.json']),
                s.join(['rsrc', 'api', 'common.json']),
                s.join(['rsrc', 'api', 'terms.json'])
            ],
            'xml': [s.join(['rsrc', 'resources.xml'])]
        }
        got = manager.run(['all', 'makeresources'])
        self.assertEqual(want, got)

        # 6. alter the ferenda.ini so that it doesn't specify any css/js files
        util.writefile(
            "ferenda.ini", """[__root__]
loglevel=WARNING
datadir = %s
url = http://localhost:8000/
searchendpoint = /search/
apiendpoint = /api/
cssfiles = []        
jsfiles = []        
imgfiles = []        
        """ % self.tempdir)
        want = {
            'css': [],
            'js': [],
            'img': [],
            'json': [
                s.join(['rsrc', 'api', 'context.json']),
                s.join(['rsrc', 'api', 'common.json']),
                s.join(['rsrc', 'api', 'terms.json'])
            ],
            'xml': [s.join(['rsrc', 'resources.xml'])]
        }
        got = manager.run(['all', 'makeresources'])
        self.assertEqual(want, got)
Example #51
0
 def setUp(self):
     super(EqualDirs, self).setUp()
     self.datadir = tempfile.mkdtemp()
     util.writefile(self.datadir + "/want/one.txt", "Contents of one")
     util.writefile(self.datadir + "/got/one.txt", "Contents of one")
     util.writefile(self.datadir + "/want/sub/two.text", "Contents of two")
     util.writefile(self.datadir + "/got/sub/two.text", "Contents of two")
Example #52
0
    def test_replace_if_different(self):
        # test 1: dst does not exist
        util.writefile(self.fname, "Hello")
        self.assertTrue(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))
        self.assertTrue(os.path.exists(self.fname2))

        # test 2: dst exists, but is different (gets overwritten)
        util.writefile(self.fname, "Hello (different)")
        self.assertTrue(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))
        self.assertEqual("Hello (different)",
                         util.readfile(self.fname2))

        # test 3: src and dst is identical (src gets removed)
        util.writefile(self.fname, "Hello (different)")
        self.assertFalse(util.replace_if_different(self.fname, self.fname2))
        self.assertFalse(os.path.exists(self.fname))

        # test 4: dst exist, is different, gets archived
        newfile = self.dname+"/new.txt"
        archivefile = self.dname+"/archive.txt"
        util.writefile(newfile, "Hello (archiving)")
        self.assertTrue(util.replace_if_different(newfile, self.fname2, archivefile))
        self.assertFalse(os.path.exists(newfile))
        self.assertEqual("Hello (archiving)",
                         util.readfile(self.fname2))
        self.assertEqual("Hello (different)",
                         util.readfile(archivefile))
Example #53
0
 def setUp(self):
     super(EqualDirs, self).setUp()
     self.datadir = tempfile.mkdtemp()
     util.writefile(self.datadir + "/want/one.txt", "Contents of one")
     util.writefile(self.datadir + "/got/one.txt", "Contents of one")
     util.writefile(self.datadir + "/want/sub/two.text", "Contents of two")
     util.writefile(self.datadir + "/got/sub/two.text", "Contents of two")
Example #54
0
 def test_list_invalid_attachments(self):
     # test that files with an invalid suffix (in
     # store.invalid_suffixes) is not listed
     self.store.storage_policy = "dir" # attachments require this
     files = ["downloaded/123/a/index.html",
              "downloaded/123/a/index.invalid",
              "downloaded/123/a/other.invalid",
              "downloaded/123/a/other.txt"]
     basefiles = ['123/a']
     attachments = ['other.txt']
     for f in files:
         util.writefile(self.p(f),"nonempty")
         # list_attachments(action, basefile, version=None)
     self.assertEqual(list(self.store.list_attachments("123/a", "downloaded")),
                      attachments)
Example #55
0
 def test_parse_setfile(self):
     os.mkdir(self.datadir + "/downloaded")
     util.writefile(self.datadir + "/downloaded/a.html",
                    "<p>This is doc A</p>")
     util.writefile(self.datadir + "/parsed/a.xhtml", "")
     os.environ["FERENDA_SET_TESTFILE"] = "1"
     with patch("builtins.print") as printmock:
         self._runtest()
     output = printmock.mock_calls[0][1][0]
     output = re.sub("'[^']*'", "''", output, 1)
     self.assertEqual("Overwriting '' with result of parse ('a')", output)
     del os.environ["FERENDA_SET_TESTFILE"]
     self.assertEqualXML(self.expected_xhtml,
                         util.readfile(self.datadir + "/parsed/a.xhtml"))
     pass