def toc_pagesets(self, data, facets): # our primary facet is RPUBL.rattsfallspublikation, but we # need to create one pageset for each value thereof. pagesetdict = {} selector_values = {} facet = facets[0] # should be the RPUBL.rattsfallspublikation one for row in data: pagesetid = row['rpubl_rattsfallspublikation'] if pagesetid not in pagesetdict: # Get the preferred court label from our own mapping, # fall back to the skos:prefLabel of the publikation label = self._rattsfallspublikation_label.get( row['rpubl_rattsfallspublikation'], Facet.resourcelabel(row, 'rpubl_rattsfallspublikation', self.commondata)) pagesetdict[pagesetid] = TocPageset(label=label, predicate=pagesetid, pages=[]) selected = row['rpubl_arsutgava'] selector_values[(pagesetid, selected)] = True for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True): pageset = pagesetdict[pagesetid] pageset.pages.append(TocPage(linktext=value, title="Rättsfall från %s under %s" % (pageset.label, value), binding=util.uri_leaf(pagesetid), value=value)) # make sure pagesets are returned in the preferred, arbitrary order specified by _rattsfallspublikation_order for x in pagesetdict.values(): assert x.label in self._rattsfallspublikation_order, "%s not in _rattsfallspublikation_order" % x.label return sorted(list(pagesetdict.values()), key=lambda x: self._rattsfallspublikation_order.index(x.label))
def import_dataset(sourcegraph, targetgraph): # print("Adding triples in %s to targetgraph" % filename) # sourcegraph = rdflib.Graph() # sourcegraph.parse(open(filename), format="n3") # iterate through all named things (using skos:prefLabel) for (sourceuri, name) in sourcegraph.subject_objects(predicate=SKOS.prefLabel): targeturi = targetgraph.value(predicate=SKOS.prefLabel, object=name) if not targeturi: slug = sourcegraph.value(sourceuri, SKOS.altLabel) if not slug: print("WARNING: Can't find skos:altLabel for %s, using alternate method" % sourceuri) slug = util.uri_leaf(str(sourceuri)) uri = "https://lagen.nu/dataset/%s" % str(slug).lower().translate(TRANS) print(" Adding new resource %s" %uri) targeturi = rdflib.URIRef(uri) for (p, o) in sourcegraph.predicate_objects(subject=sourceuri): if not targetgraph.value(targeturi, p): # we don't know the value for this pred if p == DCTERMS.publisher: o = URIMAP[o] print(" Adding: %s %s %s" % (targeturi, sourcegraph.qname(p), o)) targetgraph.add((targeturi, p, o)) # finally add owl:sameAs if not already there if sourceuri not in targetgraph.objects(targeturi, OWL.sameAs): targetgraph.add((targeturi, OWL.sameAs, sourceuri)) print(" Asserting res %s owl:sameAs %s " % (targeturi, sourceuri)) URIMAP[sourceuri] = targeturi
def add_finegrained(desc, template, abbrslug): # now create ~10 bunch of fine-grained templates for each # fs-template that can mint uris for sections, paragraphs # etc. # "#K{kapnr}", # "#K{kapnr}P{parnr}" # "#K{kapnr}P{parnr}S{stnr}" # "#K{kapnr}P{parnr}S{stnr}N{pnr}" # "#P{parnr}" # "#P{parnr}S{stnr}" # "#P{parnr}S{stnr}N{pnr}" # "#S{stnr}" # "#S{stnr}N{pnr}" proptuples = [(RPUBL.kapitelnummer, "K"), (RPUBL.paragrafnummer, "P"), (RINFOEX.styckenummer, "S"), (RINFOEX.punktnummer, "N")] while len(proptuples) > 1: bindings = [RPUBL.forfattningssamling, RPUBL.arsutgava, RPUBL.lopnummer] uritemplate = template + "#" for p, fragletter in proptuples: bindings.append(p) with desc.rel(COIN.template): uritemplate += fragletter + "{" + util.uri_leaf(p) + "}" # print("adding uritemplate %s" % uritemplate) desc.value(COIN.uriTemplate, uritemplate) add_bindings(desc, bindings, abbrslug) proptuples.pop(0)
def add_bindings(desc, bindings, slugFrom): for b in bindings: with desc.rel(COIN.binding): desc.rel(COIN.property, b) if b == RPUBL.forfattningssamling: desc.value(COIN.variable, "fs") desc.rel(COIN.slugFrom, slugFrom) elif b == RDF.type: desc.value(COIN.variable, "rtype") desc.rel(COIN.slugFrom, slugFrom) else: desc.value(COIN.variable, util.uri_leaf(b))
def term(cls, row, binding='dcterms_publisher', resource_graph=None): """Returns the leaf part of the URI found in ``row[binding]``. >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", ... "dcterms_title": "A Tale of Two Cities", ... "dcterms_issued": "1859-04-30", ... "dcterms_publisher": "http://example.org/chapman_hall", ... "schema_free": "true"} >>> Facet.term(row, "dcterms_publisher") 'chapman_hall' """ ret = util.uri_leaf(row[binding]) if not ret: # FIXME: get a logger and complain. but also get something # that can act as a URI fragmentx ret = row[binding].replace(" ", "_") return ret
def ident(row, binding, extra): rdftype = row[binding] if rdftype == str(self.ns['rpubl'].Utredningsbetankande): if row['rpubl_utrSerie']: leaf = util.uri_leaf(row['rpubl_utrSerie']) if leaf.startswith("ds"): return "ds" elif leaf.startswith("sou"): return "sou" else: assert leaf in ("sou", "ds"), "leaf was %s, unsure whether this is a SOU or a Ds." % leaf else: self.log.error("Row for %s is rpubl:Utredning but lacks rpubl:utrSerie" % row['uri']) elif rdftype == str(self.ns['rpubl'].Kommittedirektiv): return "dir" elif rdftype == str(self.ns['rpubl'].Proposition): return "prop" else: pass
def ident(row, binding, extra): rdftype = row[binding] if rdftype == str(self.ns['rpubl'].Utredningsbetankande): if row['rpubl_utrSerie']: leaf = util.uri_leaf(row['rpubl_utrSerie']) if leaf.startswith("ds"): return "ds" elif leaf.startswith("sou"): return "sou" else: assert leaf in ( "sou", "ds" ), "leaf was %s, unsure whether this is a SOU or a Ds." % leaf else: self.log.error( "Row for %s is rpubl:Utredning but lacks rpubl:utrSerie" % row['uri']) elif rdftype == str(self.ns['rpubl'].Kommittedirektiv): return "dir" elif rdftype == str(self.ns['rpubl'].Proposition): return "prop" else: pass
def toc_select_for_pages(self, data, pagesets, facets): def idkey(row): k = util.split_numalpha(row['dcterms_identifier']) if " not " in row['dcterms_identifier']: k[0] = "~" + k[0] # ensure notisfall sorts last return k facet = facets[0] res = {} documents = {} for row in data: key = facet.selector(row, None) if key not in documents: documents[key] = [] documents[key].append(row) pagesetdict = {} for pageset in pagesets: pagesetdict[util.uri_leaf(pageset.predicate)] = pageset for (binding, value) in sorted(documents.keys()): pageset = pagesetdict[binding] s = sorted(documents[(binding, value)], key=idkey) res[(binding, value)] = [self.toc_item(binding, row) for row in s] return res
def mainfs(row, binding, resource_graph): uri = URIRef(row[binding]) mainuri = resource_graph.value(uri, DCTERMS.isReplacedBy) if mainuri: uri = mainuri return util.uri_leaf(uri)
def stats_slice(self, data, facet, resource_graph): binding = resource_graph.qname(facet.rdftype).replace(":", "_") if facet.dimension_label: dimension_label = facet.dimension_label elif self.config.legacyapi: dimension_label = util.uri_leaf(str(facet.rdftype)) else: dimension_label = binding dimension_type = facet.dimension_type if (self.config.legacyapi and dimension_type == "value"): # legacyapi doesn't support the value type, we must # convert it into ref, and convert all string values to # fake resource ref URIs dimension_type = "ref" transformer = lambda x: ("http://example.org/fake-resource/%s" % x ).replace(" ", "_") elif self.config.legacyapi and dimension_type == "term": # legacyapi expects "Standard" over "bibo:Standard", which is what # Facet.qname returns transformer = lambda x: x.split(":")[1] else: transformer = lambda x: x observations = Counter() # one file per uri+observation seen -- avoid # double-counting observed = {} for row in data: observation = None try: # maybe if facet.dimension_type == "ref", selector # should always be Facet.defaultselector? NOTE: # we look at facet.dimension_type, not # dimension_type, as the latter may be altered if # legacyapi == True if facet.dimension_type == "ref": observation = transformer( Facet.defaultselector(row, binding)) else: observation = transformer( facet.selector(row, binding, resource_graph)) except Exception as e: # most of the time, we should swallow this # exception since it's a selector that relies on # information that is just not present in the rows # from some repos. I think. if hasattr(facet.selector, 'im_self'): # try to find the location of the selector # function for easier debugging fname = "%s.%s.%s" % (facet.selector.__module__, facet.selector.im_self.__name__, facet.selector.__name__) else: # probably a lambda function fname = facet.selector.__name__ # FIXME: do we need the repo name here to provide useful # messages? # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e))) pass if observation is not None: k = (dimension_type, observation) if (row['uri'], observation) not in observed: observed[(row['uri'], observation)] = True observations[k] += 1 return dimension_label, observations
def stats_slice(self, data, facet, resource_graph): binding = resource_graph.qname(facet.rdftype).replace(":", "_") if facet.dimension_label: dimension_label = facet.dimension_label elif self.config.legacyapi: dimension_label = util.uri_leaf(str(facet.rdftype)) else: dimension_label = binding dimension_type = facet.dimension_type if (self.config.legacyapi and dimension_type == "value"): # legacyapi doesn't support the value type, we must # convert it into ref, and convert all string values to # fake resource ref URIs dimension_type = "ref" transformer = lambda x: ( "http://example.org/fake-resource/%s" % x).replace( " ", "_") elif self.config.legacyapi and dimension_type == "term": # legacyapi expects "Standard" over "bibo:Standard", which is what # Facet.qname returns transformer = lambda x: x.split(":")[1] else: transformer = lambda x: x observations = Counter() # one file per uri+observation seen -- avoid # double-counting observed = {} for row in data: observation = None try: # maybe if facet.dimension_type == "ref", selector # should always be Facet.defaultselector? NOTE: # we look at facet.dimension_type, not # dimension_type, as the latter may be altered if # legacyapi == True if facet.dimension_type == "ref": observation = transformer(Facet.defaultselector( row, binding)) else: observation = transformer( facet.selector( row, binding, resource_graph)) except Exception as e: # most of the time, we should swallow this # exception since it's a selector that relies on # information that is just not present in the rows # from some repos. I think. if hasattr(facet.selector, 'im_self'): # try to find the location of the selector # function for easier debugging fname = "%s.%s.%s" % (facet.selector.__module__, facet.selector.im_self.__name__, facet.selector.__name__) else: # probably a lambda function fname = facet.selector.__name__ # FIXME: do we need the repo name here to provide useful # messages? # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e))) pass if observation is not None: k = (dimension_type, observation) if (row['uri'], observation) not in observed: observed[(row['uri'], observation)] = True observations[k] += 1 return dimension_label, observations