class TestDescriber(unittest.TestCase): def setUp(self): self.graph = Graph() self.graph.parse(data=""" @prefix dcterms: <http://purl.org/dc/terms/> . @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <http://example.org/doc> a foaf:Document; dcterms:title "Hello world"@en ; dcterms:identifier "ID1", "ID2"; dcterms:issued "2013-10-11"^^xsd:date; dcterms:references <http://example.org/doc2>; dcterms:subject <http://example.org/concept1>, <http://example.org/concept2> . """, format="turtle") self.desc = Describer(self.graph, "http://example.org/doc") def test_getvalues(self): self.assertEqual(self.desc.getvalues(DCTERMS.alternate), []) self.assertEqual(self.desc.getvalues(DCTERMS.title), ["Hello world"]) self.assertEqual(set(self.desc.getvalues(DCTERMS.identifier)), set(["ID1", "ID2"])) def test_getvalue(self): self.assertEqual(self.desc.getvalue(DCTERMS.title), "Hello world") self.assertEqual(self.desc.getvalue(DCTERMS.issued), datetime.date(2013,10,11)) with self.assertRaises(KeyError): self.desc.getvalue(DCTERMS.alternate) with self.assertRaises(KeyError): self.desc.getvalue(DCTERMS.identifier) def test_getrels(self): self.assertEqual(self.desc.getrels(DCTERMS.replaces), []) self.assertEqual(self.desc.getrels(DCTERMS.references), ["http://example.org/doc2"]) self.assertEqual(set(self.desc.getrels(DCTERMS.subject)), set(["http://example.org/concept1", "http://example.org/concept2"])) def test_getrel(self): self.assertEqual(self.desc.getrel(DCTERMS.references), "http://example.org/doc2") with self.assertRaises(KeyError): self.desc.getrel(DCTERMS.replaces) with self.assertRaises(KeyError): self.desc.getrel(DCTERMS.subject) def test_getrdftype(self): self.assertEqual(self.desc.getrdftype(), "http://xmlns.com/foaf/0.1/Document")
class TestDescriber(unittest.TestCase): def setUp(self): self.graph = Graph() self.graph.parse(data=""" @prefix dcterms: <http://purl.org/dc/terms/> . @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <http://example.org/doc> a foaf:Document; dcterms:title "Hello world"@en ; dcterms:identifier "ID1", "ID2"; dcterms:issued "2013-10-11"^^xsd:date; dcterms:references <http://example.org/doc2>; dcterms:subject <http://example.org/concept1>, <http://example.org/concept2> . """, format="turtle") self.desc = Describer(self.graph, "http://example.org/doc") def test_getvalues(self): self.assertEqual(self.desc.getvalues(DCTERMS.alternate), []) self.assertEqual(self.desc.getvalues(DCTERMS.title), ["Hello world"]) self.assertEqual(set(self.desc.getvalues(DCTERMS.identifier)), set(["ID1", "ID2"])) def test_getvalue(self): self.assertEqual(self.desc.getvalue(DCTERMS.title), "Hello world") self.assertEqual(self.desc.getvalue(DCTERMS.issued), datetime.date(2013, 10, 11)) with self.assertRaises(KeyError): self.desc.getvalue(DCTERMS.alternate) with self.assertRaises(KeyError): self.desc.getvalue(DCTERMS.identifier) def test_getrels(self): self.assertEqual(self.desc.getrels(DCTERMS.replaces), []) self.assertEqual(self.desc.getrels(DCTERMS.references), ["http://example.org/doc2"]) self.assertEqual( set(self.desc.getrels(DCTERMS.subject)), set(["http://example.org/concept1", "http://example.org/concept2"])) def test_getrel(self): self.assertEqual(self.desc.getrel(DCTERMS.references), "http://example.org/doc2") with self.assertRaises(KeyError): self.desc.getrel(DCTERMS.replaces) with self.assertRaises(KeyError): self.desc.getrel(DCTERMS.subject) def test_getrdftype(self): self.assertEqual(self.desc.getrdftype(), "http://xmlns.com/foaf/0.1/Document")
def infer_metadata(self, resource, basefile): # remove the bogus dcterms:issued thing that we only added to # aid URI generation. NB: This is removed in the superclass' # postprocess_doc as well, because for this lagen.nu-derived # class it needs to be done at this point, but for use of the # superclass directly, it needs to be done at some point. for o in resource.objects(DCTERMS.issued): if not o.datatype: resource.remove(DCTERMS.issued, o) sameas_uri = self.sameas_minter.space.coin_uri(resource) resource.add(OWL.sameAs, URIRef(sameas_uri)) resource.graph.add((URIRef(self.canonical_uri(basefile, True)), OWL.sameAs, resource.identifier)) # then find each rpubl:konsolideringsunderlag, and create # owl:sameas for them as well for subresource in resource.objects(RPUBL.konsolideringsunderlag): # sometimes there'll be a rpubl:konsolideringsunderlag to # a resource URI but no actual data about that # resource. This seems to happen if SFST is updated but # SFSR is not. In those cases we can't generate a # owl:sameAs URI since we have no other data about the # resource. if subresource.value(RDF.type): uri = self.sameas_minter.space.coin_uri(subresource) subresource.add(OWL.sameAs, URIRef(uri)) desc = Describer(resource.graph, resource.identifier) de = DocumentEntry(self.store.documententry_path(basefile)) if de.orig_updated: desc.value(RINFOEX.senastHamtad, de.orig_updated) if de.orig_checked: desc.value(RINFOEX.senastKontrollerad, de.orig_checked) rooturi = URIRef(desc.getrel(RPUBL.konsoliderar)) v = self.commondata.value(rooturi, DCTERMS.alternate, any=True) if v: desc.value(DCTERMS.alternate, v) v = self.commondata.value(rooturi, RDFS.label, any=True) if v: # don't include labels if they're essentially the same as # dcterms:title (legalref needs it to be able to parse # refs to laws that typically don't include SFS numbers, # so that's why they're in sfs.ttl basetitle = str(resource.value(DCTERMS.title)).rsplit(" (")[0] if not v.startswith(basetitle.lower()): desc.value(RDFS.label, util.ucfirst(v))
def infer_metadata(self, resource, basefile): # remove the bogus dcterms:issued thing that we only added to # aid URI generation. NB: This is removed in the superclass' # postprocess_doc as well, because for this lagen.nu-derived # class it needs to be done at this point, but for use of the # superclass directly, it needs to be done at some point. for o in resource.objects(DCTERMS.issued): if not o.datatype: resource.remove(DCTERMS.issued, o) sameas_uri = self.sameas_minter.space.coin_uri(resource) resource.add(OWL.sameAs, URIRef(sameas_uri)) resource.graph.add((URIRef(self.canonical_uri(basefile, True)), OWL.sameAs, resource.identifier)) # then find each rpubl:konsolideringsunderlag, and create # owl:sameas for them as well for subresource in resource.objects(RPUBL.konsolideringsunderlag): # sometimes there'll be a rpubl:konsolideringsunderlag to # a resource URI but no actual data about that # resource. This seems to happen if SFST is updated but # SFSR is not. In those cases we can't generate a # owl:sameAs URI since we have no other data about the # resource. if subresource.value(RDF.type): uri = self.sameas_minter.space.coin_uri(subresource) subresource.add(OWL.sameAs, URIRef(uri)) desc = Describer(resource.graph, resource.identifier) de = DocumentEntry(self.store.documententry_path(basefile)) if de.orig_updated: desc.value(RINFOEX.senastHamtad, de.orig_updated) if de.orig_checked: desc.value(RINFOEX.senastKontrollerad, de.orig_checked) rooturi = URIRef(desc.getrel(RPUBL.konsoliderar)) v = self.commondata.value(rooturi, DCTERMS.alternate, any=True) if v: desc.value(DCTERMS.alternate, v) v = self.commondata.value(rooturi, RDFS.label, any=True) if v: # don't include labels if they're essentially the same as # dcterms:title (legalref needs it to be able to parse # refs to laws that typically don't include SFS numbers, # so that's why they're in sfs.ttl basetitle = str(resource.value(DCTERMS.title)).rsplit(" (")[0] if not v.startswith(basetitle.lower()): desc.value(RDFS.label, util.ucfirst(v))
def selector(entry): graph = Graph() with self.store.open_distilled(entry.basefile) as fp: graph.parse(data=fp.read()) desc = Describer(graph, entry.id) return desc.getrel(self.ns['dcterms'].subject) == category
def selector(entry): graph = Graph() with self.store.open_distilled(entry.basefile) as fp: graph.parse(data=fp.read()) desc = Describer(graph, entry.id) return desc.getrel(self.ns['dcterms'].subject) == category