def test_download_url_over_access_url(self): node = BNode() g = Graph() access_url = faker.uri() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(faker.sentence()))) g.add((node, DCAT.accessURL, Literal(access_url))) resource = resource_from_rdf(g) resource.validate() assert resource.url == access_url download_url = faker.uri() g.add((node, DCAT.downloadURL, Literal(download_url))) resource = resource_from_rdf(g) resource.validate() assert resource.url == download_url
def test_resource_title_ignore_dynamic_url(self): node = BNode() g = Graph() url = 'https://www.somewhere.com/endpoint.json?param=value' g.set((node, RDF.type, DCAT.Distribution)) g.set((node, DCAT.downloadURL, URIRef(url))) resource = resource_from_rdf(g) resource.validate() assert resource.title == 'Nameless resource'
def test_resource_generic_title(self): node = BNode() g = Graph() url = 'https://www.somewhere.com/no-extension/' g.set((node, RDF.type, DCAT.Distribution)) g.set((node, DCAT.downloadURL, URIRef(url))) resource = resource_from_rdf(g) resource.validate() assert resource.title == 'Nameless resource'
def test_resource_title_from_url(self): node = BNode() g = Graph() url = 'https://www.somewhere.com/somefile.csv' g.set((node, RDF.type, DCAT.Distribution)) g.set((node, DCAT.downloadURL, URIRef(url))) resource = resource_from_rdf(g) resource.validate() assert resource.title == 'somefile.csv'
def test_resource_title_from_format(self): node = BNode() g = Graph() url = 'https://www.somewhere.com/no-extension/' g.set((node, RDF.type, DCAT.Distribution)) g.set((node, DCAT.downloadURL, URIRef(url))) g.set((node, DCT.term('format'), Literal('CSV'))) resource = resource_from_rdf(g) resource.validate() assert resource.title == 'csv resource'
def test_resource_html_description(self): node = BNode() g = Graph() description = faker.paragraph() html_description = '<div>{0}</div>'.format(description) g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(faker.sentence()))) g.add((node, DCT.description, Literal(html_description))) g.add((node, DCAT.downloadURL, Literal(faker.uri()))) resource = resource_from_rdf(g) resource.validate() assert resource.description == description
def test_can_extract_from_rdf_resource(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCAT.downloadURL, Literal(url))) resource = resource_from_rdf(g.resource(node)) resource.validate() assert isinstance(resource, Resource) assert resource.title == title assert resource.url == url
def test_minimal_resource_fields(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCAT.downloadURL, Literal(url))) resource = resource_from_rdf(g) resource.validate() self.assertIsInstance(resource, Resource) self.assertEqual(resource.title, title) self.assertEqual(resource.url, url)
def test_match_existing_resource_by_url(self): dataset = DatasetFactory(resources=ResourceFactory.build_batch(3)) existing_resource = dataset.resources[1] node = BNode() g = Graph() new_title = faker.sentence() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(new_title))) g.add((node, DCAT.downloadURL, Literal(existing_resource.url))) resource = resource_from_rdf(g, dataset) resource.validate() assert isinstance(resource, Resource) assert resource.title == new_title assert resource.id == existing_resource.id
def test_all_resource_fields(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() description = faker.paragraph() filesize = faker.pyint() issued = faker.date_time_between(start_date='-60d', end_date='-30d') modified = faker.past_datetime(start_date='-30d') mime = faker.mime_type() sha1 = faker.sha1() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCT.description, Literal(description))) g.add((node, DCAT.downloadURL, Literal(url))) g.add((node, DCT.issued, Literal(issued))) g.add((node, DCT.modified, Literal(modified))) g.add((node, DCAT.bytesSize, Literal(filesize))) g.add((node, DCAT.mediaType, Literal(mime))) g.add((node, DCT.term('format'), Literal('CSV'))) checksum = BNode() g.add((node, SPDX.checksum, checksum)) g.add((checksum, RDF.type, SPDX.Checksum)) g.add((checksum, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) g.add((checksum, SPDX.checksumValue, Literal(sha1))) resource = resource_from_rdf(g) resource.validate() assert isinstance(resource, Resource) assert resource.title == title assert resource.url == url assert resource.description == description assert resource.filesize == filesize assert resource.mime == mime assert isinstance(resource.checksum, Checksum) assert resource.checksum.type == 'sha1' assert resource.checksum.value == sha1 assert resource.published == issued assert resource.modified == modified assert resource.format == 'csv'