Beispiel #1
0
class TestDescriber(unittest.TestCase):
    def setUp(self):
        self.graph = Graph()
        self.graph.parse(data="""
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/doc> a foaf:Document;
        dcterms:title "Hello world"@en ;
        dcterms:identifier "ID1",
                       "ID2";
        dcterms:issued "2013-10-11"^^xsd:date;
        dcterms:references <http://example.org/doc2>;
        dcterms:subject <http://example.org/concept1>,
                    <http://example.org/concept2> .
        """, format="turtle")
        self.desc = Describer(self.graph, "http://example.org/doc")

    def test_getvalues(self):
        self.assertEqual(self.desc.getvalues(DCTERMS.alternate),
                         [])
        self.assertEqual(self.desc.getvalues(DCTERMS.title),
                         ["Hello world"])
        self.assertEqual(set(self.desc.getvalues(DCTERMS.identifier)),
                         set(["ID1", "ID2"]))

    def test_getvalue(self):
        self.assertEqual(self.desc.getvalue(DCTERMS.title),
                         "Hello world")
        self.assertEqual(self.desc.getvalue(DCTERMS.issued),
                         datetime.date(2013,10,11))
        with self.assertRaises(KeyError):
            self.desc.getvalue(DCTERMS.alternate)
        with self.assertRaises(KeyError):
            self.desc.getvalue(DCTERMS.identifier)

    def test_getrels(self):
        self.assertEqual(self.desc.getrels(DCTERMS.replaces),
                         [])
        self.assertEqual(self.desc.getrels(DCTERMS.references),
                         ["http://example.org/doc2"])
        self.assertEqual(set(self.desc.getrels(DCTERMS.subject)),
                         set(["http://example.org/concept1",
                              "http://example.org/concept2"]))

    def test_getrel(self):
        self.assertEqual(self.desc.getrel(DCTERMS.references),
                         "http://example.org/doc2")
        with self.assertRaises(KeyError):
            self.desc.getrel(DCTERMS.replaces)
        with self.assertRaises(KeyError):
            self.desc.getrel(DCTERMS.subject)
            
    def test_getrdftype(self):
        self.assertEqual(self.desc.getrdftype(),
                         "http://xmlns.com/foaf/0.1/Document")
Beispiel #2
0
class TestDescriber(unittest.TestCase):
    def setUp(self):
        self.graph = Graph()
        self.graph.parse(data="""
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/doc> a foaf:Document;
        dcterms:title "Hello world"@en ;
        dcterms:identifier "ID1",
                       "ID2";
        dcterms:issued "2013-10-11"^^xsd:date;
        dcterms:references <http://example.org/doc2>;
        dcterms:subject <http://example.org/concept1>,
                    <http://example.org/concept2> .
        """,
                         format="turtle")
        self.desc = Describer(self.graph, "http://example.org/doc")

    def test_getvalues(self):
        self.assertEqual(self.desc.getvalues(DCTERMS.alternate), [])
        self.assertEqual(self.desc.getvalues(DCTERMS.title), ["Hello world"])
        self.assertEqual(set(self.desc.getvalues(DCTERMS.identifier)),
                         set(["ID1", "ID2"]))

    def test_getvalue(self):
        self.assertEqual(self.desc.getvalue(DCTERMS.title), "Hello world")
        self.assertEqual(self.desc.getvalue(DCTERMS.issued),
                         datetime.date(2013, 10, 11))
        with self.assertRaises(KeyError):
            self.desc.getvalue(DCTERMS.alternate)
        with self.assertRaises(KeyError):
            self.desc.getvalue(DCTERMS.identifier)

    def test_getrels(self):
        self.assertEqual(self.desc.getrels(DCTERMS.replaces), [])
        self.assertEqual(self.desc.getrels(DCTERMS.references),
                         ["http://example.org/doc2"])
        self.assertEqual(
            set(self.desc.getrels(DCTERMS.subject)),
            set(["http://example.org/concept1",
                 "http://example.org/concept2"]))

    def test_getrel(self):
        self.assertEqual(self.desc.getrel(DCTERMS.references),
                         "http://example.org/doc2")
        with self.assertRaises(KeyError):
            self.desc.getrel(DCTERMS.replaces)
        with self.assertRaises(KeyError):
            self.desc.getrel(DCTERMS.subject)

    def test_getrdftype(self):
        self.assertEqual(self.desc.getrdftype(),
                         "http://xmlns.com/foaf/0.1/Document")
Beispiel #3
0
    def infer_metadata(self, resource, basefile):
        # remove the bogus dcterms:issued thing that we only added to
        # aid URI generation. NB: This is removed in the superclass'
        # postprocess_doc as well, because for this lagen.nu-derived
        # class it needs to be done at this point, but for use of the
        # superclass directly, it needs to be done at some point.
        for o in resource.objects(DCTERMS.issued):
            if not o.datatype:
                resource.remove(DCTERMS.issued, o)
        sameas_uri = self.sameas_minter.space.coin_uri(resource)
        resource.add(OWL.sameAs, URIRef(sameas_uri))
        resource.graph.add((URIRef(self.canonical_uri(basefile, True)),
                            OWL.sameAs, resource.identifier))
        # then find each rpubl:konsolideringsunderlag, and create
        # owl:sameas for them as well
        for subresource in resource.objects(RPUBL.konsolideringsunderlag):
            # sometimes there'll be a rpubl:konsolideringsunderlag to
            # a resource URI but no actual data about that
            # resource. This seems to happen if SFST is updated but
            # SFSR is not. In those cases we can't generate a
            # owl:sameAs URI since we have no other data about the
            # resource.
            if subresource.value(RDF.type):
                uri = self.sameas_minter.space.coin_uri(subresource)
                subresource.add(OWL.sameAs, URIRef(uri))
        desc = Describer(resource.graph, resource.identifier)
        de = DocumentEntry(self.store.documententry_path(basefile))
        if de.orig_updated:
            desc.value(RINFOEX.senastHamtad, de.orig_updated)
        if de.orig_checked:
            desc.value(RINFOEX.senastKontrollerad, de.orig_checked)
        rooturi = URIRef(desc.getrel(RPUBL.konsoliderar))

        v = self.commondata.value(rooturi, DCTERMS.alternate, any=True)
        if v:
            desc.value(DCTERMS.alternate, v)
        v = self.commondata.value(rooturi, RDFS.label, any=True)
        if v:
            # don't include labels if they're essentially the same as
            # dcterms:title (legalref needs it to be able to parse
            # refs to laws that typically don't include SFS numbers,
            # so that's why they're in sfs.ttl
            basetitle = str(resource.value(DCTERMS.title)).rsplit(" (")[0]
            if not v.startswith(basetitle.lower()):
                desc.value(RDFS.label, util.ucfirst(v))
Beispiel #4
0
    def infer_metadata(self, resource, basefile):
        # remove the bogus dcterms:issued thing that we only added to
        # aid URI generation. NB: This is removed in the superclass'
        # postprocess_doc as well, because for this lagen.nu-derived
        # class it needs to be done at this point, but for use of the
        # superclass directly, it needs to be done at some point.
        for o in resource.objects(DCTERMS.issued):
            if not o.datatype:
                resource.remove(DCTERMS.issued, o)
        sameas_uri = self.sameas_minter.space.coin_uri(resource)
        resource.add(OWL.sameAs, URIRef(sameas_uri))
        resource.graph.add((URIRef(self.canonical_uri(basefile, True)),
                            OWL.sameAs, resource.identifier))
        # then find each rpubl:konsolideringsunderlag, and create
        # owl:sameas for them as well
        for subresource in resource.objects(RPUBL.konsolideringsunderlag):
            # sometimes there'll be a rpubl:konsolideringsunderlag to
            # a resource URI but no actual data about that
            # resource. This seems to happen if SFST is updated but
            # SFSR is not. In those cases we can't generate a
            # owl:sameAs URI since we have no other data about the
            # resource.
            if subresource.value(RDF.type):
                uri = self.sameas_minter.space.coin_uri(subresource)
                subresource.add(OWL.sameAs, URIRef(uri))
        desc = Describer(resource.graph, resource.identifier)
        de = DocumentEntry(self.store.documententry_path(basefile))
        if de.orig_updated:
            desc.value(RINFOEX.senastHamtad, de.orig_updated)
        if de.orig_checked:
            desc.value(RINFOEX.senastKontrollerad, de.orig_checked)
        rooturi = URIRef(desc.getrel(RPUBL.konsoliderar))

        v = self.commondata.value(rooturi, DCTERMS.alternate, any=True)
        if v:
            desc.value(DCTERMS.alternate, v)
        v = self.commondata.value(rooturi, RDFS.label, any=True)
        if v:
            # don't include labels if they're essentially the same as
            # dcterms:title (legalref needs it to be able to parse
            # refs to laws that typically don't include SFS numbers,
            # so that's why they're in sfs.ttl
            basetitle = str(resource.value(DCTERMS.title)).rsplit(" (")[0]
            if not v.startswith(basetitle.lower()):
                desc.value(RDFS.label, util.ucfirst(v))
Beispiel #5
0
 def selector(entry):
     graph = Graph()
     with self.store.open_distilled(entry.basefile) as fp:
         graph.parse(data=fp.read())
     desc = Describer(graph, entry.id)
     return desc.getrel(self.ns['dcterms'].subject) == category
Beispiel #6
0
 def selector(entry):
     graph = Graph()
     with self.store.open_distilled(entry.basefile) as fp:
         graph.parse(data=fp.read())
     desc = Describer(graph, entry.id)
     return desc.getrel(self.ns['dcterms'].subject) == category