def __init__(self, repos, resourcedir, **kwargs): # FIXME: document what kwargs could be (particularly 'combineresources') self.repos = repos self.resourcedir = resourcedir from ferenda.manager import DEFAULT_CONFIG defaults = dict(DEFAULT_CONFIG) defaults.update(DocumentRepository.get_default_options()) defaults.update(kwargs) self.config = LayeredConfig(Defaults(defaults)) # the below call to setup_logger alters the logging level of # the root logger, which can't be good practice. Also, we # should probably not log to the root logger, but rather to # ferenda.resources. # # from ferenda.manager import setup_logger # self.log = setup_logger() self.log = logging.getLogger("ferenda.resources") # FIXME: How should we set up a global loadpath from the # individual repos? loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos] loadpath = ["."] # cwd always has priority -- makes sense? for subpath in loadpaths: for p in subpath: if p not in loadpath: loadpath.append(p) self.resourceloader = ResourceLoader(*loadpath)
def sameas_minter(self): # make a resourceloader that only loads resource from # superclasses, not this actual class. This'll make it # look in ferenda/sources/legal/se/res, not lagen/nu/res. loadpath = ResourceLoader.make_loadpath(self) if "lagen/nu/" in loadpath[0]: loadpath = loadpath[1:] rl = ResourceLoader(*loadpath) spacefile = rl.filename("uri/swedishlegalsource.space.ttl") # print("sameas: Loading URISpace from %s" % spacefile) self.log.debug("Loading URISpace from %s" % spacefile) with open(spacefile) as space: cfg = Graph().parse(space, format="turtle") # slugs contains space:abbrSlug, but space contains # urispace:abbrSlug... We do a little translation src = URIRef("http://rinfo.lagrummet.se/sys/uri/space#abbrSlug") dst = URIRef("https://lagen.nu/sys/uri/space#abbrSlug") for (s, p, o) in cfg: if o == src: # print("Translating %s %s :abbrSlug" % (s.n3(), p.n3())) cfg.remove((s, p, o)) cfg.add((s, p, dst)) elif s == dst: # print("Translating :abbrSlug %s %s" % (p.n3(), o.n3())) cfg.remove((s, p, o)) cfg.add((dst, p, o)) slugsfile = self.resourceloader.filename("uri/swedishlegalsource.slugs.ttl") # self.log.debug("sameas: Loading slugs from %s" % slugsfile) with open(slugsfile) as slugs: cfg.parse(slugs, format="turtle") COIN = Namespace("http://purl.org/court/def/2009/coin#") # select correct URI for the URISpace definition by # finding a single coin:URISpace object spaceuri = cfg.value(predicate=RDF.type, object=COIN.URISpace) return URIMinter(cfg, spaceuri)
def __init__(self, repos, inifile=None, **kwargs): self.repos = repos self.log = logging.getLogger("wsgi") # FIXME: Cut-n-paste of the method in Resources.__init__ loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos] loadpath = ["."] # cwd always has priority -- makes sense? for subpath in loadpaths: for p in subpath: if p not in loadpath: loadpath.append(p) self.resourceloader = ResourceLoader(*loadpath) # FIXME: need to specify documentroot? defaults = DocumentRepository.get_default_options() if inifile: assert os.path.exists( inifile), "INI file %s doesn't exist (relative to %s)" % ( inifile, os.getcwd()) # NB: If both inifile and kwargs are specified, the latter # will take precedence. I think this is the expected # behaviour. self.config = LayeredConfig(Defaults(defaults), INIFile(inifile), Defaults(kwargs), cascade=True)
def test_run_makeresources_defaultconfig(self): util.resource_extract( ResourceLoader(), 'scripts/ferenda.template.ini', "ferenda.ini", { 'storetype': 'SQLITE', 'storelocation': 'data/ferenda.sqlite', 'storerepository': 'ferenda', 'indextype': 'WHOOSH', 'indexlocation': 'data/whooshindex', 'sitename': 'Test' }) self._enable_repos() s = os.sep got = manager.run(['all', 'makeresources', '--loglevel=CRITICAL']) want = { 'xml': [s.join(['rsrc', 'resources.xml'])], 'json': [ s.join(['rsrc', 'api', 'context.json']), s.join(['rsrc', 'api', 'common.json']), s.join(['rsrc', 'api', 'terms.json']) ], 'img': [ s.join(['rsrc', 'img', 'atom.png']), s.join(['rsrc', 'img', 'test.png']) ], 'css': [ s.join(['rsrc', 'css', 'ferenda.css']), s.join(['rsrc', 'css', 'test.css']) ], 'js': [ s.join(['rsrc', 'js', 'ferenda.js']), s.join(['rsrc', 'js', 'test.js']) ] } self.assertEqual(want, got)
def test_loadpath(self): self.assertEqual( ResourceLoader.make_loadpath(self), [ "test/res", # from test.testResourceLoader.SubTestCase "ferenda/res" # from ferenda.compat.unittest.TestCase ])
def __init__(self, repos, inifile=None, **kwargs): self.repos = repos self.log = logging.getLogger("wsgi") # FIXME: Cut-n-paste of the method in Resources.__init__ loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos] loadpath = ["."] # cwd always has priority -- makes sense? for subpath in loadpaths: for p in subpath: if p not in loadpath: loadpath.append(p) self.resourceloader = ResourceLoader(*loadpath) # FIXME: need to specify documentroot? defaults = DocumentRepository.get_default_options() if inifile: assert os.path.exists( inifile), "INI file %s doesn't exist (relative to %s)" % (inifile, os.getcwd()) # NB: If both inifile and kwargs are specified, the latter # will take precedence. I think this is the expected # behaviour. self.config = LayeredConfig(Defaults(defaults), INIFile(inifile), Defaults(kwargs), cascade=True)
def _simplify_ooxml(self, data, pretty_print=True): # simplify the horrendous mess that is OOXML through # simplify-ooxml.xsl. Returns a formatted XML stream as a # bytestring. # in some rare cases, the value \xc2\x81 (utf-8 for # control char) is used where "Å" (\xc3\x85) should be # used. if b"\xc2\x81" in data: self.log.warning("Working around control char x81 in text data") data = data.replace(b"\xc2\x81", b"\xc3\x85") intree = etree.parse(BytesIO(data)) # intree = etree.parse(fp) if not hasattr(self, 'ooxml_transform'): fp = ResourceLoader().openfp("xsl/simplify-ooxml.xsl") self.ooxml_transform = etree.XSLT(etree.parse(fp)) fp.close() resulttree = self.ooxml_transform(intree) return etree.tostring(resulttree, pretty_print=pretty_print, encoding="utf-8")
def __init__(self, repos, resourcedir, **kwargs): # FIXME: document what kwargs could be (particularly 'combineresources') self.repos = repos self.resourcedir = resourcedir defaults = DocumentRepository.get_default_options() defaults.update(kwargs) self.config = LayeredConfig(Defaults(defaults)) # the below call to setup_logger alters the logging level of # the root logger, which can't be good practice. Also, we # should probably not log to the root logger, but rather to # ferenda.resources. # # from ferenda.manager import setup_logger # self.log = setup_logger() self.log = logging.getLogger("ferenda.resources") # FIXME: How should we set up a global loadpath from the # individual repos? loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos] loadpath = ["."] # cwd always has priority -- makes sense? for subpath in loadpaths: for p in subpath: if p not in loadpath: loadpath.append(p) self.resourceloader = ResourceLoader(*loadpath)
def __init__( self, transformertype, template, templatedir, # within the resourceloader resourceloader=None, documentroot=None, config=None): cls = {'XSLT': XSLTTransform, 'JINJA': JinjaTransform}[transformertype] if not resourceloader: resourceloader = ResourceLoader() self.resourceloader = resourceloader self.t = cls(template, templatedir, self.resourceloader) self.documentroot = documentroot self.config = config
class Resources(object): """Creates and manages various assets/resources needed for web serving. """ def __init__(self, repos, resourcedir, **kwargs): # FIXME: document what kwargs could be (particularly 'combineresources') self.repos = repos self.resourcedir = resourcedir defaults = DocumentRepository.get_default_options() defaults.update(kwargs) self.config = LayeredConfig(Defaults(defaults)) # the below call to setup_logger alters the logging level of # the root logger, which can't be good practice. Also, we # should probably not log to the root logger, but rather to # ferenda.resources. # # from ferenda.manager import setup_logger # self.log = setup_logger() self.log = logging.getLogger("ferenda.resources") # FIXME: How should we set up a global loadpath from the # individual repos? loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos] loadpath = ["."] # cwd always has priority -- makes sense? for subpath in loadpaths: for p in subpath: if p not in loadpath: loadpath.append(p) self.resourceloader = ResourceLoader(*loadpath) def make(self, css=True, js=True, img=True, xml=True, api=None): res = {} if api is None: api = not self.config.staticsite if css: res['css'] = self.make_css() if js: res['js'] = self.make_js() if img: res['img'] = self.make_img() if xml: res['xml'] = self.make_resources_xml(res.get('css', []), res.get('js', [])) if api: res['json'] = self.make_api_files() # finally, normalize paths according to os.path.sep # conventions if os.sep == "\\": for part in res: result = [] for x in res[part]: if x.startswith("http://") or x.startswith("https://"): result.append(x) else: result.append(x.replace('/', os.sep)) res[part] = result return res def make_css(self): import cssmin combinefile = None if self.config.combineresources: combinefile = os.sep.join([self.resourcedir, 'css', 'combined.css']) return self._make_files( 'cssfiles', self.resourcedir + os.sep + 'css', combinefile, cssmin.cssmin) def make_js(self): # slimit provides better perf, but isn't py3 compatible # import slimit # js = slimit.minify( # jsbuffer.getvalue(), mangle=True, mangle_toplevel=True) import jsmin combinefile = None if self.config.combineresources: combinefile = os.sep.join([self.resourcedir, 'js', 'combined.js']) return self._make_files( 'jsfiles', self.resourcedir + os.sep + 'js', combinefile, jsmin.jsmin) def make_img(self): return self._make_files('imgfiles', self.resourcedir + os.sep + 'img') def make_resources_xml(self, cssfiles, jsfiles): E = ElementMaker() # namespace = None, nsmap={None: ...} root = E.configuration( E.sitename(self.config.sitename), E.sitedescription(self.config.sitedescription), E.url(self.config.url), E.tabs(*self._links('tabs')), E.footerlinks(*self._links('footer')), E.stylesheets(*self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")), E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" ")) ) if not self.config.staticsite: root.append( E.search( E.endpoint(self.config.searchendpoint) ) ) outfile = self.resourcedir + os.sep + "resources.xml" util.writefile( outfile, etree.tostring( root, encoding="utf-8", pretty_print=True).decode("utf-8")) self.log.info("Wrote %s" % outfile) return [self._filepath_to_urlpath(outfile, 1)] # FIXME: When creating <script> elements, must take care not to # create self-closing tags (like by creating a single space text # node) def _li_wrap(self, items, container, attribute, text=None, **kwargs): elements = [] for item in items: kwargs[attribute] = item e = etree.Element(container, **kwargs) e.text = text elements.append(e) return elements def _links(self, methodname): E = ElementMaker() elements = [] for repo in self.repos: alias = repo.alias items = getattr(repo, methodname)() self.log.debug("Adding %(methodname)s from docrepo %(alias)s" % locals()) elements.extend(self._links_listitems(items)) return elements def _links_listitems(self, listitems): E = ElementMaker() elements = [] for item in listitems: if len(item) == 2: (label, url) = item sublists = None else: (label, url, sublists) = item self.log.debug( " - %(label)s (%(url)s)" % locals()) if url: li = E.li(E.a({'href': url}, label)) else: li = E.li(label) if sublists: subelements = [] for sublist in sublists: subelements.extend(self._links_listitems(sublist)) li.append(E.ul(*subelements)) elements.append(li) return elements def _make_files(self, option, filedir, combinefile=None, combinefunc=None): urls = [] buf = BytesIO() processed = set() # eg. self.config.cssfiles if getattr(self.config, option): # it's possible to set eg # cssfiles=None when # creating the Resources # object for f in getattr(self.config, option): urls.append(self._process_file(f, buf, filedir, "ferenda.ini")) processed.add(f) for repo in self.repos: # FIXME: create a more generic way of optionally # signalling to a repo that "Hey, now it's time to create # your resources if you can" if repo.__class__.__name__ == "SFS" and option == "imgfiles": self.log.info("calling into SFS._makeimages()") LayeredConfig.set(repo.config, 'imgfiles', repo._makeimages()) for f in getattr(repo.config, option): if f in processed: continue urls.append(self._process_file(f, buf, filedir, repo.alias)) processed.add(f) urls = list(filter(None, urls)) if combinefile: txt = buf.getvalue().decode('utf-8') util.writefile(combinefile, combinefunc(txt)) return [self._filepath_to_urlpath(combinefile, 2)] else: return urls def _process_file(self, filename, buf, destdir, origin=""): """ Helper function to concatenate or copy CSS/JS (optionally processing them with e.g. Scss) or other files to correct place under the web root directory. :param filename: The name (relative to the ferenda package) of the file :param buf: A buffer into which the contents of the file is written (if combineresources == True) :param destdir: The directory into which the file will be copied (unless combineresources == True) :param origin: The source of the configuration that specifies this file :returns: The URL path of the resulting file, relative to the web root (or None if combineresources == True) :rtype: str """ if filename.startswith("http://") or filename.startswith("https://"): if self.config.combineresources: raise errors.ConfigurationError( "makeresources: Can't use combineresources=True in combination with external js/css URLs (%s)" % filename) self.log.debug("Using external url %s" % filename) return filename try: fp = self.resourceloader.openfp(filename, binary=True) except errors.ResourceNotFound: self.log.warning("file %(filename)s (specified in %(origin)s)" " doesn't exist" % locals()) return None (base, ext) = os.path.splitext(filename) if self.config.combineresources: self.log.debug("combining %s into buffer" % filename) d = fp.read() buf.write(d) fp.close() return None else: # FIXME: don't copy (at least not log) if the outfile # already exists. # self.log.debug("writing %s out to %s" % (filename, destdir)) outfile = destdir + os.sep + os.path.basename(filename) if (os.path.islink(outfile) and os.path.relpath(os.path.join(os.path.dirname(outfile), os.readlink(outfile))) == util.name_from_fp(fp)): self.log.warning("%s is a symlink to source file %s, won't overwrite" % (outfile, util.name_from_fp(fp))) else: util.ensure_dir(outfile) with open(outfile, "wb") as fp2: fp2.write(fp.read()) fp.close() return self._filepath_to_urlpath(outfile, 2) def make_api_files(self): # this should create the following files under resourcedir # api/context.json (aliased to /json-ld/context.json if legacyapi) # api/terms.json (aliased to /var/terms.json if legacyapi) # api/common.json (aliased to /var/common.json if legacyapi) # MAYBE api/ui/ - copied from ferenda/res/ui files = [] context = os.sep.join([self.resourcedir, "api", "context.json"]) if self.config.legacyapi: self.log.info("Creating API files for legacyapi") contextpath = "/json-ld/context.json" termspath = "/var/terms" commonpath = "/var/common" else: # FIXME: create correct URL path contextpath = "/rsrc/api/context.json" termspath = "/rsrc/api/terms.json" commonpath = "/rsrc/api/common.json" util.ensure_dir(context) with open(context, "w") as fp: contextdict = self._get_json_context() s = json.dumps({"@context": contextdict}, separators=(', ', ': '), indent=4, sort_keys=True) fp.write(s) files.append(self._filepath_to_urlpath(context, 2)) common = os.sep.join([self.resourcedir, "api", "common.json"]) terms = os.sep.join([self.resourcedir, "api", "terms.json"]) for (filename, func, urlpath) in ((common, self._get_common_graph, commonpath), (terms, self._get_term_graph, termspath)): g = func(self.config.url + urlpath[1:]) d = json.loads(g.serialize(format="json-ld", context=contextdict, indent=4).decode("utf-8")) # d might not contain a @context (if contextdict == {}, ie # no repos are given) if '@context' in d: d['@context'] = contextpath if self.config.legacyapi: d = self._convert_legacy_jsonld(d, self.config.url + urlpath[1:]) with open(filename, "w") as fp: s = json.dumps(d, indent=4, separators=(', ', ': '), sort_keys=True) fp.write(s) files.append(self._filepath_to_urlpath(filename, 2)) if self.config.legacyapi: # copy ui explorer app to <url>/rsrc/ui/ -- this does not get # included in files targetdir = os.sep.join([self.resourcedir, "ui"]) self.resourceloader.extractdir("ui", targetdir) return files def _convert_legacy_jsonld(self, indata, rooturi): # the json structure should be a top node containing only # @context, iri (localhost:8000/var/terms), type (foaf:Document) # and topic - a list of dicts, where each dict looks like: # # {"iri" : "referatserie", # "comment" : "Anger vilken referatserie som referatet eventuellt tillhör.", # "label" : "Referatserie", # "type" : "DatatypeProperty"} out = {} topics = [] # the property containing the id/uri for the # record may be under @id or iri, depending on # whether self.config.legacyapi was in effect for # _get_json_context() if self.config.legacyapi: idfld = 'iri' else: idfld = '@id' # indata might be a mapping containing a list of mappings # under @graph, or it might just be the actual list. wantedlist = None if isinstance(indata, list): wantedlist = indata else: for topkey, topval in indata.items(): if topkey == "@graph": wantedlist = topval break if not wantedlist: self.log.warning( "Couldn't find list of mappings in %s, topics will be empty" % indata) else: shortened = {} for subject in sorted(wantedlist, key=lambda x: x["iri"]): if subject[idfld] == rooturi: for key, value in subject.items(): if key in (idfld, 'foaf:topic'): continue out[key] = value else: for key in subject: if isinstance(subject[key], list): # make sure multiple values are sorted for # the same reason as below subject[key].sort() # FIXME: We want to use just the urileaf for # legacyapi clients (ie Standard instead of # bibo:Standard) but to be proper json-ld, this # requires that we define contexts for this. Which # we don't (yet) if ("iri" in subject and ":" in subject["iri"] and "://" not in subject["iri"]): short = subject["iri"].split(":", 1)[1] if short in shortened: self.log.warning( "Cannot shorten IRI %s -> %s, already defined (%s)" % (subject["iri"], short, shortened[short])) del subject["iri"] # skips adding this to topics else: shortened[short] = subject["iri"] subject["iri"] = short if "iri" in subject and subject["iri"]: topics.append(subject) # make sure the triples are in a predictable order, so we can # compare on the JSON level for testing out['topic'] = sorted(topics, key=lambda x: x[idfld]) out['iri'] = rooturi if '@context' in indata: out['@context'] = indata['@context'] return out def _get_json_context(self): data = {} # step 1: define all prefixes for repo in self.repos: for (prefix, ns) in repo.ns.items(): if prefix in data: assert data[prefix] == str( ns), "Conflicting URIs for prefix %s" % prefix else: data[prefix] = str(ns) # foaf and rdfs must always be defined prefixes data["foaf"] = "http://xmlns.com/foaf/0.1/" data["rdfs"] = "http://www.w3.org/2000/01/rdf-schema#" # the legacy api client expects some terms to be available using # shortened forms (eg 'label' instead of 'rdfs:label'), so we must # define them in our context if self.config.legacyapi: data['iri'] = "@id" data['type'] = "@type" data['label'] = 'rdfs:label' data['name'] = 'foaf:name' data['altLabel'] = 'skos:altLabel' # data["@language"] = "en" # how to set this? majority vote of # repos / documents? note that it's # only a default. return data def _get_term_graph(self, graphuri): # produce a rdf graph of the terms (classes and properties) in # the vocabs we're using. This should preferably entail # loading the vocabularies (stored as RDF/OWL documents), and # expressing all the things that are owl:*Property, owl:Class, # rdf:Property and rdf:Class. As an intermediate step, we # could have preprocessed rdf graphs (stored in # res/vocab/dcterms.ttl, res/vocab/bibo.ttl etc) derived from the # vocabularies and pull them in like we pull in namespaces in # self.ns The rdf graph should be rooted in an url (eg # http://localhost:8080/var/terms, and then have each term as # a foaf:topic. Each term should be described with its # rdf:type, rdfs:label (most important!) and possibly # rdfs:comment root = URIRef(graphuri) g = Graph() g.add((root, RDF.type, FOAF.Document)) bigg = Graph() paths = set() for repo in self.repos: for p, ns in repo.ns.items(): if p in ("rdf", "rdfs", "owl"): continue g.bind(p, ns) resourcename = "vocab/%s.ttl" % p if repo.resourceloader.exists(resourcename): ontopath = repo.resourceloader.filename(resourcename) if ontopath not in paths: self.log.debug("Loading vocabulary %s" % ontopath) with open(ontopath) as onto: bigg.parse(onto, format="turtle") paths.add(ontopath) g.bind("foaf", "http://xmlns.com/foaf/0.1/") for (s, p, o) in bigg: if p in (RDF.type, RDFS.label, RDFS.comment): if isinstance(s, BNode): # occurs in the def of foaf:member continue g.add((root, FOAF.topic, s)) # unless we've already added it? if isinstance(o, Literal): # remove language typing info o = Literal(str(o)) g.add((s, p, o)) # control duplicates somehow return g def _get_common_graph(self, graphuri): # create a graph with foaf:names for all entities (publishers, # publication series etc) that our data mentions. root = URIRef(graphuri) g = Graph() g.bind("skos", SKOS) g.bind("foaf", FOAF) g.add((root, RDF.type, FOAF.Document)) paths = set() bigg = Graph() for repo in self.repos: for cls in inspect.getmro(repo.__class__): if hasattr(cls, "alias"): resourcename = "extra/%s.ttl" % cls.alias if repo.resourceloader.exists(resourcename): commonpath = repo.resourceloader.filename(resourcename) if commonpath not in paths: self.log.debug("loading data %s" % commonpath) with open(commonpath) as common: bigg.parse(common, format="turtle") paths.add(commonpath) for (s, p, o) in bigg: if p in (FOAF.name, SKOS.prefLabel, SKOS.altLabel, BIBO.identifier): g.add((root, FOAF.topic, s)) # strip any typing/langtagging (because of reasons) if isinstance(o, Literal): o = Literal(str(o)) g.add((s, p, o)) # try to find a type g.add((s, RDF.type, bigg.value(s, RDF.type))) return g def _filepath_to_urlpath(self, path, keep_segments=2): """ :param path: the full or relative filepath to transform into a urlpath :param keep_segments: the number of directory segments to keep (the ending filename is always kept) """ # data/repo/rsrc/js/main.js, 3 -> repo/rsrc/js/main.js # /var/folders/tmp4q6b1g/rsrc/resources.xml, 1 -> rsrc/resources.xml # C:\docume~1\owner\locals~1\temp\tmpgbyuk7\rsrc\css\test.css, 2 - rsrc/css/test.css path = path.replace(os.sep, "/") urlpath = "/".join(path.split("/")[-(keep_segments + 1):]) # print("_filepath_to_urlpath (%s): %s -> %s" % (keep_segments, path, urlpath)) return urlpath
class Resources(object): """Creates and manages various assets/resources needed for web serving. """ def __init__(self, repos, resourcedir, **kwargs): # FIXME: document what kwargs could be (particularly 'combineresources') self.repos = repos self.resourcedir = resourcedir from ferenda.manager import DEFAULT_CONFIG defaults = dict(DEFAULT_CONFIG) defaults.update(DocumentRepository.get_default_options()) defaults.update(kwargs) self.config = LayeredConfig(Defaults(defaults)) # the below call to setup_logger alters the logging level of # the root logger, which can't be good practice. Also, we # should probably not log to the root logger, but rather to # ferenda.resources. # # from ferenda.manager import setup_logger # self.log = setup_logger() self.log = logging.getLogger("ferenda.resources") # FIXME: How should we set up a global loadpath from the # individual repos? loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos] loadpath = ["."] # cwd always has priority -- makes sense? for subpath in loadpaths: for p in subpath: if p not in loadpath: loadpath.append(p) self.resourceloader = ResourceLoader(*loadpath) def make(self, css=True, js=True, img=True, xml=True, api=None): res = {} if api is None: api = not self.config.staticsite if css: res['css'] = self.make_css() if js: res['js'] = self.make_js() if img: res['img'] = self.make_img() if xml: res['xml'] = self.make_resources_xml(res.get('css', []), res.get('js', [])) if api: res['json'] = self.make_api_files() # finally, normalize paths according to os.path.sep # conventions if os.sep == "\\": for part in res: result = [] for x in res[part]: if x.startswith("http://") or x.startswith("https://"): result.append(x) else: result.append(x.replace('/', os.sep)) res[part] = result return res def make_css(self): import cssmin combinefile = None if self.config.combineresources: combinefile = os.sep.join( [self.resourcedir, 'css', 'combined.css']) return self._make_files('cssfiles', self.resourcedir + os.sep + 'css', combinefile, cssmin.cssmin) def make_js(self): # slimit provides better perf, but isn't py3 compatible # import slimit # js = slimit.minify( # jsbuffer.getvalue(), mangle=True, mangle_toplevel=True) import jsmin combinefile = None if self.config.combineresources: combinefile = os.sep.join([self.resourcedir, 'js', 'combined.js']) return self._make_files('jsfiles', self.resourcedir + os.sep + 'js', combinefile, jsmin.jsmin) def make_img(self): return self._make_files('imgfiles', self.resourcedir + os.sep + 'img') def make_resources_xml(self, cssfiles, jsfiles): E = ElementMaker() # namespace = None, nsmap={None: ...} root = E.configuration( E.sitename(self.config.sitename), E.sitedescription(self.config.sitedescription), E.url(self.config.url), E.tabs(*self._links('tabs')), E.footerlinks(*self._links('footer')), E.stylesheets( *self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")), E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" "))) if not self.config.staticsite: root.append(E.search(E.endpoint(self.config.searchendpoint))) outfile = self.resourcedir + os.sep + "resources.xml" util.writefile( outfile, etree.tostring(root, encoding="utf-8", pretty_print=True).decode("utf-8")) self.log.info("Wrote %s" % outfile) return [self._filepath_to_urlpath(outfile, 1)] # FIXME: When creating <script> elements, must take care not to # create self-closing tags (like by creating a single space text # node) def _li_wrap(self, items, container, attribute, text=None, **kwargs): elements = [] for item in items: kwargs[attribute] = item e = etree.Element(container, **kwargs) e.text = text elements.append(e) return elements def _links(self, methodname): E = ElementMaker() elements = [] for repo in self.repos: alias = repo.alias items = getattr(repo, methodname)() self.log.debug("Adding %(methodname)s from docrepo %(alias)s" % locals()) elements.extend(self._links_listitems(items)) return elements def _links_listitems(self, listitems): E = ElementMaker() elements = [] for item in listitems: if len(item) == 2: (label, url) = item sublists = None else: (label, url, sublists) = item self.log.debug(" - %(label)s (%(url)s)" % locals()) if url: li = E.li(E.a({'href': url}, label)) else: li = E.li(label) if sublists: subelements = [] for sublist in sublists: subelements.extend(self._links_listitems(sublist)) li.append(E.ul(*subelements)) elements.append(li) return elements def _make_files(self, option, filedir, combinefile=None, combinefunc=None): urls = [] buf = BytesIO() processed = set() # eg. self.config.cssfiles if getattr(self.config, option): # it's possible to set eg # cssfiles=None when # creating the Resources # object for f in getattr(self.config, option): urls.append(self._process_file(f, buf, filedir, "ferenda.ini")) processed.add(f) for repo in self.repos: # FIXME: create a more generic way of optionally # signalling to a repo that "Hey, now it's time to create # your resources if you can" if repo.__class__.__name__ == "SFS" and option == "imgfiles": self.log.info("calling into SFS._makeimages()") LayeredConfig.set(repo.config, 'imgfiles', repo._makeimages()) if hasattr(repo.config, option): for f in getattr(repo.config, option): if f in processed: continue urls.append(self._process_file(f, buf, filedir, repo.alias)) processed.add(f) urls = list(filter(None, urls)) if combinefile: txt = buf.getvalue().decode('utf-8') util.writefile(combinefile, combinefunc(txt)) return [self._filepath_to_urlpath(combinefile, 2)] else: return urls def _process_file(self, filename, buf, destdir, origin=""): """ Helper function to concatenate or copy CSS/JS (optionally processing them with e.g. Scss) or other files to correct place under the web root directory. :param filename: The name (relative to the ferenda package) of the file :param buf: A buffer into which the contents of the file is written (if combineresources == True) :param destdir: The directory into which the file will be copied (unless combineresources == True) :param origin: The source of the configuration that specifies this file :returns: The URL path of the resulting file, relative to the web root (or None if combineresources == True) :rtype: str """ if filename.startswith("http://") or filename.startswith("https://"): if self.config.combineresources: raise errors.ConfigurationError( "makeresources: Can't use combineresources=True in combination with external js/css URLs (%s)" % filename) self.log.debug("Using external url %s" % filename) return filename try: fp = self.resourceloader.openfp(filename, binary=True) except errors.ResourceNotFound: self.log.warning("file %(filename)s (specified in %(origin)s)" " doesn't exist" % locals()) return None (base, ext) = os.path.splitext(filename) if self.config.combineresources: self.log.debug("combining %s into buffer" % filename) d = fp.read() buf.write(d) fp.close() return None else: # FIXME: don't copy (at least not log) if the outfile # already exists. # self.log.debug("writing %s out to %s" % (filename, destdir)) outfile = destdir + os.sep + os.path.basename(filename) if (os.path.islink(outfile) and os.path.relpath( os.path.join(os.path.dirname(outfile), os.readlink(outfile))) == util.name_from_fp(fp)): self.log.warning( "%s is a symlink to source file %s, won't overwrite" % (outfile, util.name_from_fp(fp))) else: util.ensure_dir(outfile) with open(outfile, "wb") as fp2: fp2.write(fp.read()) fp.close() return self._filepath_to_urlpath(outfile, 2) def make_api_files(self): # this should create the following files under resourcedir # api/context.json (aliased to /json-ld/context.json if legacyapi) # api/terms.json (aliased to /var/terms.json if legacyapi) # api/common.json (aliased to /var/common.json if legacyapi) # MAYBE api/ui/ - copied from ferenda/res/ui files = [] context = os.sep.join([self.resourcedir, "api", "context.json"]) if self.config.legacyapi: self.log.info("Creating API files for legacyapi") contextpath = "/json-ld/context.json" termspath = "/var/terms" commonpath = "/var/common" else: # FIXME: create correct URL path contextpath = "/rsrc/api/context.json" termspath = "/rsrc/api/terms.json" commonpath = "/rsrc/api/common.json" util.ensure_dir(context) with open(context, "w") as fp: contextdict = self._get_json_context() s = json.dumps({"@context": contextdict}, separators=(', ', ': '), indent=4, sort_keys=True) fp.write(s) files.append(self._filepath_to_urlpath(context, 2)) common = os.sep.join([self.resourcedir, "api", "common.json"]) terms = os.sep.join([self.resourcedir, "api", "terms.json"]) for (filename, func, urlpath) in ((common, self._get_common_graph, commonpath), (terms, self._get_term_graph, termspath)): g = func(self.config.url + urlpath[1:]) d = json.loads( g.serialize(format="json-ld", context=contextdict, indent=4).decode("utf-8")) # d might not contain a @context (if contextdict == {}, ie # no repos are given) if '@context' in d: d['@context'] = contextpath if self.config.legacyapi: d = self._convert_legacy_jsonld(d, self.config.url + urlpath[1:]) with open(filename, "w") as fp: s = json.dumps(d, indent=4, separators=(', ', ': '), sort_keys=True) fp.write(s) files.append(self._filepath_to_urlpath(filename, 2)) if self.config.legacyapi: # copy ui explorer app to <url>/rsrc/ui/ -- this does not get # included in files targetdir = os.sep.join([self.resourcedir, "ui"]) self.resourceloader.extractdir("ui", targetdir) return files def _convert_legacy_jsonld(self, indata, rooturi): # the json structure should be a top node containing only # @context, iri (localhost:8000/var/terms), type (foaf:Document) # and topic - a list of dicts, where each dict looks like: # # {"iri" : "referatserie", # "comment" : "Anger vilken referatserie som referatet eventuellt tillhör.", # "label" : "Referatserie", # "type" : "DatatypeProperty"} out = {} topics = [] # the property containing the id/uri for the # record may be under @id or iri, depending on # whether self.config.legacyapi was in effect for # _get_json_context() if self.config.legacyapi: idfld = 'iri' else: idfld = '@id' # indata might be a mapping containing a list of mappings # under @graph, or it might just be the actual list. wantedlist = None if isinstance(indata, list): wantedlist = indata else: for topkey, topval in indata.items(): if topkey == "@graph": wantedlist = topval break if not wantedlist: self.log.warning( "Couldn't find list of mappings in %s, topics will be empty" % indata) else: shortened = {} for subject in sorted(wantedlist, key=lambda x: x["iri"]): if subject[idfld] == rooturi: for key, value in subject.items(): if key in (idfld, 'foaf:topic'): continue out[key] = value else: for key in subject: if isinstance(subject[key], list): # make sure multiple values are sorted for # the same reason as below subject[key].sort() # FIXME: We want to use just the urileaf for # legacyapi clients (ie Standard instead of # bibo:Standard) but to be proper json-ld, this # requires that we define contexts for this. Which # we don't (yet) if ("iri" in subject and ":" in subject["iri"] and "://" not in subject["iri"]): short = subject["iri"].split(":", 1)[1] if short in shortened: self.log.warning( "Cannot shorten IRI %s -> %s, already defined (%s)" % (subject["iri"], short, shortened[short])) del subject["iri"] # skips adding this to topics else: shortened[short] = subject["iri"] subject["iri"] = short if "iri" in subject and subject["iri"]: topics.append(subject) # make sure the triples are in a predictable order, so we can # compare on the JSON level for testing out['topic'] = sorted(topics, key=lambda x: x[idfld]) out['iri'] = rooturi if '@context' in indata: out['@context'] = indata['@context'] return out def _get_json_context(self): data = {} # step 1: define all prefixes for repo in self.repos: for (prefix, ns) in repo.ns.items(): if prefix in data: assert data[prefix] == str( ns), "Conflicting URIs for prefix %s" % prefix else: data[prefix] = str(ns) # foaf and rdfs must always be defined prefixes data["foaf"] = "http://xmlns.com/foaf/0.1/" data["rdfs"] = "http://www.w3.org/2000/01/rdf-schema#" # the legacy api client expects some terms to be available using # shortened forms (eg 'label' instead of 'rdfs:label'), so we must # define them in our context if self.config.legacyapi: data['iri'] = "@id" data['type'] = "@type" data['label'] = 'rdfs:label' data['name'] = 'foaf:name' data['altLabel'] = 'skos:altLabel' # data["@language"] = "en" # how to set this? majority vote of # repos / documents? note that it's # only a default. return data def _get_term_graph(self, graphuri): # produce a rdf graph of the terms (classes and properties) in # the vocabs we're using. This should preferably entail # loading the vocabularies (stored as RDF/OWL documents), and # expressing all the things that are owl:*Property, owl:Class, # rdf:Property and rdf:Class. As an intermediate step, we # could have preprocessed rdf graphs (stored in # res/vocab/dcterms.ttl, res/vocab/bibo.ttl etc) derived from the # vocabularies and pull them in like we pull in namespaces in # self.ns The rdf graph should be rooted in an url (eg # http://localhost:8080/var/terms, and then have each term as # a foaf:topic. Each term should be described with its # rdf:type, rdfs:label (most important!) and possibly # rdfs:comment root = URIRef(graphuri) g = Graph() g.add((root, RDF.type, FOAF.Document)) bigg = Graph() paths = set() for repo in self.repos: for p, ns in repo.ns.items(): if p in ("rdf", "rdfs", "owl"): continue g.bind(p, ns) resourcename = "vocab/%s.ttl" % p if repo.resourceloader.exists(resourcename): ontopath = repo.resourceloader.filename(resourcename) if ontopath not in paths: self.log.debug("Loading vocabulary %s" % ontopath) with open(ontopath) as onto: bigg.parse(onto, format="turtle") paths.add(ontopath) g.bind("foaf", "http://xmlns.com/foaf/0.1/") for (s, p, o) in bigg: if p in (RDF.type, RDFS.label, RDFS.comment): if isinstance(s, BNode): # occurs in the def of foaf:member continue g.add((root, FOAF.topic, s)) # unless we've already added it? if isinstance(o, Literal): # remove language typing info o = Literal(str(o)) g.add((s, p, o)) # control duplicates somehow return g def _get_common_graph(self, graphuri): # create a graph with foaf:names for all entities (publishers, # publication series etc) that our data mentions. root = URIRef(graphuri) g = Graph() g.bind("skos", SKOS) g.bind("foaf", FOAF) g.add((root, RDF.type, FOAF.Document)) paths = set() bigg = Graph() for repo in self.repos: for cls in inspect.getmro(repo.__class__): if hasattr(cls, "alias"): resourcename = "extra/%s.ttl" % cls.alias if repo.resourceloader.exists(resourcename): commonpath = repo.resourceloader.filename(resourcename) if commonpath not in paths: self.log.debug("loading data %s" % commonpath) with open(commonpath) as common: bigg.parse(common, format="turtle") paths.add(commonpath) for (s, p, o) in bigg: if p in (FOAF.name, SKOS.prefLabel, SKOS.altLabel, BIBO.identifier): g.add((root, FOAF.topic, s)) # strip any typing/langtagging (because of reasons) if isinstance(o, Literal): o = Literal(str(o)) g.add((s, p, o)) # try to find a type g.add((s, RDF.type, bigg.value(s, RDF.type))) return g def _filepath_to_urlpath(self, path, keep_segments=2): """ :param path: the full or relative filepath to transform into a urlpath :param keep_segments: the number of directory segments to keep (the ending filename is always kept) """ # data/repo/rsrc/js/main.js, 3 -> repo/rsrc/js/main.js # /var/folders/tmp4q6b1g/rsrc/resources.xml, 1 -> rsrc/resources.xml # C:\docume~1\owner\locals~1\temp\tmpgbyuk7\rsrc\css\test.css, 2 - rsrc/css/test.css path = path.replace(os.sep, "/") urlpath = "/".join(path.split("/")[-(keep_segments + 1):]) # print("_filepath_to_urlpath (%s): %s -> %s" % (keep_segments, path, urlpath)) return urlpath
def setUp(self): self.tempdir = tempfile.mkdtemp() loadpath = [self.tempdir + "/primary", self.tempdir + "/secondary"] util.writefile(loadpath[0] + os.sep + "primaryresource.txt", "Hello") util.writefile(loadpath[1] + os.sep + "secondaryresource.txt", "World") self.resourceloader = ResourceLoader(*loadpath)
class Main(SubTestCase, DocumentEntry): def setUp(self): self.tempdir = tempfile.mkdtemp() loadpath = [self.tempdir + "/primary", self.tempdir + "/secondary"] util.writefile(loadpath[0] + os.sep + "primaryresource.txt", "Hello") util.writefile(loadpath[1] + os.sep + "secondaryresource.txt", "World") self.resourceloader = ResourceLoader(*loadpath) def tearDown(self): shutil.rmtree(self.tempdir) def test_loadpath(self): self.assertEqual( ResourceLoader.make_loadpath(self), [ "test/res", # from test.testResourceLoader.SubTestCase "ferenda/res" # from ferenda.compat.unittest.TestCase ]) def test_exists(self): self.assertTrue(self.resourceloader.exists("primaryresource.txt")) self.assertTrue(self.resourceloader.exists("secondaryresource.txt")) self.assertTrue(self.resourceloader.exists("robots.txt")) self.assertFalse(self.resourceloader.exists("nonexistent.txt")) def test_open(self): with self.resourceloader.open("primaryresource.txt") as fp: self.assertEqual("Hello", fp.read()) with self.resourceloader.open("secondaryresource.txt") as fp: self.assertEqual("World", fp.read()) # should be available through the pkg_resources API with self.resourceloader.open("robots.txt") as fp: self.assertIn("# robotstxt.org/", fp.read()) with self.assertRaises(ResourceNotFound): with self.resourceloader.open("nonexistent.txt") as fp: fp.read() def test_openfp(self): fp = self.resourceloader.openfp("primaryresource.txt") self.assertEqual("Hello", fp.read()) fp.close() fp = self.resourceloader.openfp("secondaryresource.txt") self.assertEqual("World", fp.read()) fp.close() fp = self.resourceloader.openfp("robots.txt") self.assertIn("# robotstxt.org/", fp.read()) fp.close() with self.assertRaises(ResourceNotFound): fp = self.resourceloader.openfp("nonexistent.txt") def test_read(self): self.assertEqual("Hello", self.resourceloader.load("primaryresource.txt")) self.assertEqual("World", self.resourceloader.load("secondaryresource.txt")) self.assertIn("# robotstxt.org/", self.resourceloader.load("robots.txt")) with self.assertRaises(ResourceNotFound): self.resourceloader.load("nonexistent.txt") def test_filename(self): self.assertEqual(self.tempdir + "/primary/primaryresource.txt", self.resourceloader.filename("primaryresource.txt")) self.assertEqual(self.tempdir + "/secondary/secondaryresource.txt", self.resourceloader.filename("secondaryresource.txt")) self.assertEqual("ferenda/res/robots.txt", self.resourceloader.filename("robots.txt")) with self.assertRaises(ResourceNotFound): self.resourceloader.filename("nonexistent.txt") def test_extractdir(self): dest = self.tempdir + os.sep + "dest" os.mkdir(dest) self.resourceloader.extractdir(None, dest) self.assertEqual( set(os.listdir(dest)), set([ "primaryresource.txt", "secondaryresource.txt", "robots.txt", "humans.txt" ]))
def test_loadpath(self): self.assertEqual(ResourceLoader.make_loadpath(self), ["test/res", # from test.testResourceLoader.SubTestCase "ferenda/res" # from ferenda.compat.unittest.TestCase ])
def setUp(self): self.tempdir = tempfile.mkdtemp() loadpath = [self.tempdir + "/primary", self.tempdir + "/secondary"] util.writefile(loadpath[0]+os.sep+"primaryresource.txt", "Hello") util.writefile(loadpath[1]+os.sep+"secondaryresource.txt", "World") self.resourceloader = ResourceLoader(*loadpath)
class Main(SubTestCase, DocumentEntry): def setUp(self): self.tempdir = tempfile.mkdtemp() loadpath = [self.tempdir + "/primary", self.tempdir + "/secondary"] util.writefile(loadpath[0]+os.sep+"primaryresource.txt", "Hello") util.writefile(loadpath[1]+os.sep+"secondaryresource.txt", "World") self.resourceloader = ResourceLoader(*loadpath) def tearDown(self): shutil.rmtree(self.tempdir) def test_loadpath(self): self.assertEqual(ResourceLoader.make_loadpath(self), ["test/res", # from test.testResourceLoader.SubTestCase "ferenda/res" # from ferenda.compat.unittest.TestCase ]) def test_exists(self): self.assertTrue(self.resourceloader.exists("primaryresource.txt")) self.assertTrue(self.resourceloader.exists("secondaryresource.txt")) self.assertTrue(self.resourceloader.exists("robots.txt")) self.assertFalse(self.resourceloader.exists("nonexistent.txt")) def test_open(self): with self.resourceloader.open("primaryresource.txt") as fp: self.assertEqual("Hello", fp.read()) with self.resourceloader.open("secondaryresource.txt") as fp: self.assertEqual("World", fp.read()) # should be available through the pkg_resources API with self.resourceloader.open("robots.txt") as fp: self.assertIn("# robotstxt.org/", fp.read()) with self.assertRaises(ResourceNotFound): with self.resourceloader.open("nonexistent.txt") as fp: fp.read() def test_openfp(self): fp = self.resourceloader.openfp("primaryresource.txt") self.assertEqual("Hello", fp.read()) fp.close() fp = self.resourceloader.openfp("secondaryresource.txt") self.assertEqual("World", fp.read()) fp.close() fp = self.resourceloader.openfp("robots.txt") self.assertIn("# robotstxt.org/", fp.read()) fp.close() with self.assertRaises(ResourceNotFound): fp = self.resourceloader.openfp("nonexistent.txt") def test_read(self): self.assertEqual("Hello", self.resourceloader.load("primaryresource.txt")) self.assertEqual("World", self.resourceloader.load("secondaryresource.txt")) self.assertIn("# robotstxt.org/", self.resourceloader.load("robots.txt")) with self.assertRaises(ResourceNotFound): self.resourceloader.load("nonexistent.txt") def test_filename(self): self.assertEqual(self.tempdir + "/primary/primaryresource.txt", self.resourceloader.filename("primaryresource.txt")) self.assertEqual(self.tempdir + "/secondary/secondaryresource.txt", self.resourceloader.filename("secondaryresource.txt")) self.assertEqual("ferenda/res/robots.txt", self.resourceloader.filename("robots.txt")) with self.assertRaises(ResourceNotFound): self.resourceloader.filename("nonexistent.txt") def test_extractdir(self): dest = self.tempdir + os.sep + "dest" os.mkdir(dest) self.resourceloader.extractdir(None, dest) self.assertEqual(set(os.listdir(dest)), set(["primaryresource.txt", "secondaryresource.txt", "robots.txt", "humans.txt"]))