Esempio n. 1
0
    def toc_pagesets(self, data, facets):
        # our primary facet is RPUBL.rattsfallspublikation, but we
        # need to create one pageset for each value thereof.
        pagesetdict = {}
        selector_values = {}
        facet = facets[0]  # should be the RPUBL.rattsfallspublikation one
        for row in data:
            pagesetid = row['rpubl_rattsfallspublikation']
            if pagesetid not in pagesetdict:
                # Get the preferred court label from our own mapping,
                # fall back to the skos:prefLabel of the publikation
                label = self._rattsfallspublikation_label.get(
                    row['rpubl_rattsfallspublikation'],
                    Facet.resourcelabel(row, 'rpubl_rattsfallspublikation',
                                        self.commondata))
                pagesetdict[pagesetid] = TocPageset(label=label,
                                                    predicate=pagesetid,
                                                    pages=[])
            selected = row['rpubl_arsutgava']
            selector_values[(pagesetid, selected)] = True

        for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True):
            pageset = pagesetdict[pagesetid]
            pageset.pages.append(TocPage(linktext=value,
                                         title="Rättsfall från %s under %s" % (pageset.label, value),
                                         binding=util.uri_leaf(pagesetid),
                                         value=value))

        # make sure pagesets are returned in the preferred, arbitrary order specified by _rattsfallspublikation_order
        for x in pagesetdict.values():
            assert x.label in self._rattsfallspublikation_order, "%s not in _rattsfallspublikation_order" % x.label
        return sorted(list(pagesetdict.values()), key=lambda x: self._rattsfallspublikation_order.index(x.label))
Esempio n. 2
0
def import_dataset(sourcegraph, targetgraph):
    # print("Adding triples in %s to targetgraph" % filename)
    # sourcegraph = rdflib.Graph()
    # sourcegraph.parse(open(filename), format="n3")
    # iterate through all named things (using skos:prefLabel)
    for (sourceuri, name) in sourcegraph.subject_objects(predicate=SKOS.prefLabel):
        targeturi = targetgraph.value(predicate=SKOS.prefLabel, object=name)
        if not targeturi:
            slug = sourcegraph.value(sourceuri, SKOS.altLabel)
            if not slug:
                print("WARNING: Can't find skos:altLabel for %s, using alternate method" %  sourceuri)
                slug = util.uri_leaf(str(sourceuri))
                      
            uri = "https://lagen.nu/dataset/%s" % str(slug).lower().translate(TRANS)
            print("  Adding new resource %s" %uri)
            targeturi = rdflib.URIRef(uri)
            
        for (p, o) in sourcegraph.predicate_objects(subject=sourceuri):
            if not targetgraph.value(targeturi, p): # we don't know the value for this pred
                if p == DCTERMS.publisher:
                    o = URIMAP[o] 
                print("    Adding: %s %s %s" % (targeturi, sourcegraph.qname(p), o))
                targetgraph.add((targeturi, p, o))
        # finally add owl:sameAs if not already there
        if sourceuri not in targetgraph.objects(targeturi, OWL.sameAs):
            targetgraph.add((targeturi, OWL.sameAs, sourceuri))
            print("    Asserting res %s owl:sameAs %s " % (targeturi, sourceuri))
        URIMAP[sourceuri] = targeturi
Esempio n. 3
0
def add_finegrained(desc, template, abbrslug):
    # now create ~10 bunch of fine-grained templates for each
    # fs-template that can mint uris for sections, paragraphs
    # etc. 
    # "#K{kapnr}",
    # "#K{kapnr}P{parnr}"
    # "#K{kapnr}P{parnr}S{stnr}"
    # "#K{kapnr}P{parnr}S{stnr}N{pnr}"
    # "#P{parnr}"
    # "#P{parnr}S{stnr}"
    # "#P{parnr}S{stnr}N{pnr}"
    # "#S{stnr}"
    # "#S{stnr}N{pnr}"
    proptuples = [(RPUBL.kapitelnummer, "K"),
                  (RPUBL.paragrafnummer, "P"),
                  (RINFOEX.styckenummer, "S"),
                  (RINFOEX.punktnummer, "N")]
    while len(proptuples) > 1:
        bindings = [RPUBL.forfattningssamling, RPUBL.arsutgava, RPUBL.lopnummer]
        uritemplate = template + "#"
        for p, fragletter in proptuples:
            bindings.append(p)
            with desc.rel(COIN.template):
                uritemplate += fragletter + "{" + util.uri_leaf(p) + "}"
                # print("adding uritemplate %s" % uritemplate)
                desc.value(COIN.uriTemplate, uritemplate)
                add_bindings(desc, bindings, abbrslug)
                             
        proptuples.pop(0)
Esempio n. 4
0
def add_bindings(desc, bindings, slugFrom):
    for b in bindings:
        with desc.rel(COIN.binding):
            desc.rel(COIN.property, b)
            if b == RPUBL.forfattningssamling:
                desc.value(COIN.variable, "fs")
                desc.rel(COIN.slugFrom, slugFrom)
            elif b == RDF.type:
                desc.value(COIN.variable, "rtype")
                desc.rel(COIN.slugFrom, slugFrom)
            else:
                desc.value(COIN.variable, util.uri_leaf(b))
Esempio n. 5
0
    def term(cls, row, binding='dcterms_publisher', resource_graph=None):
        """Returns the leaf part of the URI found in ``row[binding]``.

        >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
        ...        "dcterms_title": "A Tale of Two Cities",
        ...        "dcterms_issued": "1859-04-30",
        ...        "dcterms_publisher": "http://example.org/chapman_hall",
        ...        "schema_free": "true"}
        >>> Facet.term(row, "dcterms_publisher")
        'chapman_hall'
        """
        ret = util.uri_leaf(row[binding])
        if not ret:
            # FIXME: get a logger and complain. but also get something
            # that can act as a URI fragmentx
            ret = row[binding].replace(" ", "_")
        return ret
Esempio n. 6
0
    def term(cls, row, binding='dcterms_publisher', resource_graph=None):
        """Returns the leaf part of the URI found in ``row[binding]``.

        >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
        ...        "dcterms_title": "A Tale of Two Cities",
        ...        "dcterms_issued": "1859-04-30",
        ...        "dcterms_publisher": "http://example.org/chapman_hall",
        ...        "schema_free": "true"}
        >>> Facet.term(row, "dcterms_publisher")
        'chapman_hall'
        """
        ret = util.uri_leaf(row[binding])
        if not ret:
            # FIXME: get a logger and complain. but also get something
            # that can act as a URI fragmentx
            ret = row[binding].replace(" ", "_")
        return ret
Esempio n. 7
0
 def ident(row, binding, extra):
     rdftype = row[binding]
     if rdftype == str(self.ns['rpubl'].Utredningsbetankande):
         if row['rpubl_utrSerie']:
             leaf = util.uri_leaf(row['rpubl_utrSerie'])
             if leaf.startswith("ds"):
                 return "ds"
             elif leaf.startswith("sou"):
                 return "sou"
             else:
                 assert leaf in ("sou", "ds"), "leaf was %s, unsure whether this is a SOU or a Ds." % leaf
         else:
             self.log.error("Row for %s is rpubl:Utredning but lacks rpubl:utrSerie" % row['uri'])
     elif rdftype == str(self.ns['rpubl'].Kommittedirektiv):
         return "dir"
     elif rdftype == str(self.ns['rpubl'].Proposition):
         return "prop"
     else:
         pass
Esempio n. 8
0
 def ident(row, binding, extra):
     rdftype = row[binding]
     if rdftype == str(self.ns['rpubl'].Utredningsbetankande):
         if row['rpubl_utrSerie']:
             leaf = util.uri_leaf(row['rpubl_utrSerie'])
             if leaf.startswith("ds"):
                 return "ds"
             elif leaf.startswith("sou"):
                 return "sou"
             else:
                 assert leaf in (
                     "sou", "ds"
                 ), "leaf was %s, unsure whether this is a SOU or a Ds." % leaf
         else:
             self.log.error(
                 "Row for %s is rpubl:Utredning but lacks rpubl:utrSerie"
                 % row['uri'])
     elif rdftype == str(self.ns['rpubl'].Kommittedirektiv):
         return "dir"
     elif rdftype == str(self.ns['rpubl'].Proposition):
         return "prop"
     else:
         pass
Esempio n. 9
0
 def toc_select_for_pages(self, data, pagesets, facets):
     def idkey(row):
         k = util.split_numalpha(row['dcterms_identifier'])
         if " not " in row['dcterms_identifier']:
             k[0] = "~" + k[0] # ensure notisfall sorts last
         return k
     facet = facets[0]
     res = {}
     documents = {}
     for row in data:
         key = facet.selector(row, None)
         if key not in documents:
             documents[key] = []
         documents[key].append(row)
     pagesetdict = {}
     for pageset in pagesets:
         pagesetdict[util.uri_leaf(pageset.predicate)] = pageset
     for (binding, value) in sorted(documents.keys()):
         pageset = pagesetdict[binding]
         s = sorted(documents[(binding, value)], key=idkey)
         res[(binding, value)] = [self.toc_item(binding, row)
                                  for row in s]
     return res
Esempio n. 10
0
 def mainfs(row, binding, resource_graph):
     uri = URIRef(row[binding])
     mainuri = resource_graph.value(uri, DCTERMS.isReplacedBy)
     if mainuri:
         uri = mainuri
     return util.uri_leaf(uri)
Esempio n. 11
0
    def stats_slice(self, data, facet, resource_graph):
        binding = resource_graph.qname(facet.rdftype).replace(":", "_")
        if facet.dimension_label:
            dimension_label = facet.dimension_label
        elif self.config.legacyapi:
            dimension_label = util.uri_leaf(str(facet.rdftype))
        else:
            dimension_label = binding

        dimension_type = facet.dimension_type
        if (self.config.legacyapi and dimension_type == "value"):
            # legacyapi doesn't support the value type, we must
            # convert it into ref, and convert all string values to
            # fake resource ref URIs
            dimension_type = "ref"
            transformer = lambda x: ("http://example.org/fake-resource/%s" % x
                                     ).replace(" ", "_")
        elif self.config.legacyapi and dimension_type == "term":
            # legacyapi expects "Standard" over "bibo:Standard", which is what
            # Facet.qname returns
            transformer = lambda x: x.split(":")[1]
        else:
            transformer = lambda x: x

        observations = Counter()
        # one file per uri+observation seen -- avoid
        # double-counting
        observed = {}
        for row in data:
            observation = None
            try:
                # maybe if facet.dimension_type == "ref", selector
                # should always be Facet.defaultselector?  NOTE:
                # we look at facet.dimension_type, not
                # dimension_type, as the latter may be altered if
                # legacyapi == True
                if facet.dimension_type == "ref":
                    observation = transformer(
                        Facet.defaultselector(row, binding))
                else:
                    observation = transformer(
                        facet.selector(row, binding, resource_graph))

            except Exception as e:
                # most of the time, we should swallow this
                # exception since it's a selector that relies on
                # information that is just not present in the rows
                # from some repos. I think.
                if hasattr(facet.selector, 'im_self'):
                    # try to find the location of the selector
                    # function for easier debugging
                    fname = "%s.%s.%s" % (facet.selector.__module__,
                                          facet.selector.im_self.__name__,
                                          facet.selector.__name__)
                else:
                    # probably a lambda function
                    fname = facet.selector.__name__
                # FIXME: do we need the repo name here to provide useful
                # messages?
                # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e)))

                pass
            if observation is not None:
                k = (dimension_type, observation)
                if (row['uri'], observation) not in observed:
                    observed[(row['uri'], observation)] = True
                    observations[k] += 1
        return dimension_label, observations
Esempio n. 12
0
    def stats_slice(self, data, facet, resource_graph):
        binding = resource_graph.qname(facet.rdftype).replace(":", "_")
        if facet.dimension_label:
            dimension_label = facet.dimension_label
        elif self.config.legacyapi:
            dimension_label = util.uri_leaf(str(facet.rdftype))
        else:
            dimension_label = binding

        dimension_type = facet.dimension_type
        if (self.config.legacyapi and
                dimension_type == "value"):
            # legacyapi doesn't support the value type, we must
            # convert it into ref, and convert all string values to
            # fake resource ref URIs
            dimension_type = "ref"
            transformer = lambda x: (
                "http://example.org/fake-resource/%s" %
                x).replace(
                " ",
                "_")
        elif self.config.legacyapi and dimension_type == "term":
            # legacyapi expects "Standard" over "bibo:Standard", which is what
            # Facet.qname returns
            transformer = lambda x: x.split(":")[1]
        else:
            transformer = lambda x: x

        observations = Counter()
        # one file per uri+observation seen -- avoid
        # double-counting
        observed = {}
        for row in data:
            observation = None
            try:
                # maybe if facet.dimension_type == "ref", selector
                # should always be Facet.defaultselector?  NOTE:
                # we look at facet.dimension_type, not
                # dimension_type, as the latter may be altered if
                # legacyapi == True
                if facet.dimension_type == "ref":
                    observation = transformer(Facet.defaultselector(
                        row, binding))
                else:
                    observation = transformer(
                        facet.selector(
                            row,
                            binding,
                            resource_graph))

            except Exception as e:
                # most of the time, we should swallow this
                # exception since it's a selector that relies on
                # information that is just not present in the rows
                # from some repos. I think.
                if hasattr(facet.selector, 'im_self'):
                    # try to find the location of the selector
                    # function for easier debugging
                    fname = "%s.%s.%s" % (facet.selector.__module__,
                                          facet.selector.im_self.__name__,
                                          facet.selector.__name__)
                else:
                    # probably a lambda function
                    fname = facet.selector.__name__
                # FIXME: do we need the repo name here to provide useful
                # messages?
                # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e)))

                pass
            if observation is not None:
                k = (dimension_type, observation)
                if (row['uri'], observation) not in observed:
                    observed[(row['uri'], observation)] = True
                    observations[k] += 1
        return dimension_label, observations
Esempio n. 13
0
 def mainfs(row, binding, resource_graph):
     uri = URIRef(row[binding])
     mainuri = resource_graph.value(uri, DCTERMS.isReplacedBy)
     if mainuri:
         uri = mainuri
     return util.uri_leaf(uri)