Beispiel #1
0
    def test_all_resource_fields(self):
        license = LicenseFactory()
        resource = ResourceFactory(format='csv')
        dataset = DatasetFactory(resources=[resource], license=license)
        permalink = url_for('datasets.resource',
                            id=resource.id,
                            _external=True)

        r = resource_to_rdf(resource, dataset)

        assert r.value(DCT.title) == Literal(resource.title)
        assert r.value(DCT.description) == Literal(resource.description)
        assert r.value(DCT.issued) == Literal(resource.published)
        assert r.value(DCT.modified) == Literal(resource.modified)
        assert r.value(DCT.license).identifier == URIRef(license.url)
        assert r.value(DCT.rights) == Literal(license.title)
        assert r.value(DCAT.downloadURL).identifier == URIRef(resource.url)
        assert r.value(DCAT.accessURL).identifier == URIRef(permalink)
        assert r.value(DCAT.bytesSize) == Literal(resource.filesize)
        assert r.value(DCAT.mediaType) == Literal(resource.mime)
        assert r.value(DCT.term('format')) == Literal(resource.format)

        checksum = r.value(SPDX.checksum)
        assert r.graph.value(checksum.identifier, RDF.type) == SPDX.Checksum
        assert (r.graph.value(checksum.identifier, SPDX.algorithm)
                == SPDX.checksumAlgorithm_sha1)
        assert (checksum.value(SPDX.checksumValue)
                == Literal(resource.checksum.value))
Beispiel #2
0
    def test_resource_title_from_format(self):
        node = BNode()
        g = Graph()
        url = 'https://www.somewhere.com/no-extension/'

        g.set((node, RDF.type, DCAT.Distribution))
        g.set((node, DCAT.downloadURL, URIRef(url)))
        g.set((node, DCT.term('format'), Literal('CSV')))

        resource = resource_from_rdf(g)
        resource.validate()

        assert resource.title == 'csv resource'
Beispiel #3
0
def resource_from_rdf(graph_or_distrib, dataset=None):
    '''
    Map a Resource domain model to a DCAT/RDF graph
    '''
    if isinstance(graph_or_distrib, RdfResource):
        distrib = graph_or_distrib
    else:
        node = graph_or_distrib.value(predicate=RDF.type,
                                      object=DCAT.Distribution)
        distrib = graph_or_distrib.resource(node)

    download_url = url_from_rdf(distrib, DCAT.downloadURL)
    access_url = url_from_rdf(distrib, DCAT.accessURL)
    url = safe_unicode(download_url or access_url)

    if dataset:
        resource = get_by(dataset.resources, 'url', url)
    if not dataset or not resource:
        resource = Resource()
        if dataset:
            dataset.resources.append(resource)
    resource.title = title_from_rdf(distrib, url)
    resource.url = url
    resource.description = sanitize_html(distrib.value(DCT.description))
    resource.filesize = rdf_value(distrib, DCAT.bytesSize)
    resource.mime = rdf_value(distrib, DCAT.mediaType)
    fmt = rdf_value(distrib, DCT.term('format'))
    if fmt:
        resource.format = fmt.lower()
    checksum = distrib.value(SPDX.checksum)
    if checksum:
        algorithm = checksum.value(SPDX.algorithm).identifier
        algorithm = CHECKSUM_ALGORITHMS.get(algorithm)
        if algorithm:
            resource.checksum = Checksum()
            resource.checksum.value = rdf_value(checksum, SPDX.checksumValue)
            resource.checksum.type = algorithm

    resource.published = rdf_value(distrib, DCT.issued, resource.published)
    resource.modified = rdf_value(distrib, DCT.modified, resource.modified)

    identifier = rdf_value(distrib, DCT.identifier)
    if identifier:
        resource.extras['dct:identifier'] = identifier

    if isinstance(distrib.identifier, URIRef):
        resource.extras['uri'] = distrib.identifier.toPython()

    return resource
Beispiel #4
0
def resource_from_rdf(graph_or_distrib, dataset=None):
    '''
    Map a Resource domain model to a DCAT/RDF graph
    '''
    if isinstance(graph_or_distrib, RdfResource):
        distrib = graph_or_distrib
    else:
        node = graph_or_distrib.value(predicate=RDF.type,
                                      object=DCAT.Distribution)
        distrib = graph_or_distrib.resource(node)

    download_url = url_from_rdf(distrib, DCAT.downloadURL)
    access_url = url_from_rdf(distrib, DCAT.accessURL)
    url = safe_unicode(download_url or access_url)

    if dataset:
        resource = get_by(dataset.resources, 'url', url)
    if not dataset or not resource:
        resource = Resource()
        if dataset:
            dataset.resources.append(resource)
    resource.title = title_from_rdf(distrib, url)
    resource.url = url
    resource.description = sanitize_html(distrib.value(DCT.description))
    resource.filesize = rdf_value(distrib, DCAT.bytesSize)
    resource.mime = rdf_value(distrib, DCAT.mediaType)
    fmt = rdf_value(distrib, DCT.term('format'))
    if fmt:
        resource.format = fmt.lower()
    checksum = distrib.value(SPDX.checksum)
    if checksum:
        algorithm = checksum.value(SPDX.algorithm).identifier
        algorithm = CHECKSUM_ALGORITHMS.get(algorithm)
        if algorithm:
            resource.checksum = Checksum()
            resource.checksum.value = rdf_value(checksum, SPDX.checksumValue)
            resource.checksum.type = algorithm

    resource.published = rdf_value(distrib, DCT.issued, resource.published)
    resource.modified = rdf_value(distrib, DCT.modified, resource.modified)

    identifier = rdf_value(distrib, DCT.identifier)
    if identifier:
        resource.extras['dct:identifier'] = identifier

    if isinstance(distrib.identifier, URIRef):
        resource.extras['uri'] = distrib.identifier.toPython()

    return resource
Beispiel #5
0
    def test_all_resource_fields(self):
        node = BNode()
        g = Graph()

        title = faker.sentence()
        url = faker.uri()
        description = faker.paragraph()
        filesize = faker.pyint()
        issued = faker.date_time_between(start_date='-60d', end_date='-30d')
        modified = faker.past_datetime(start_date='-30d')
        mime = faker.mime_type()
        sha1 = faker.sha1()

        g.add((node, RDF.type, DCAT.Distribution))
        g.add((node, DCT.title, Literal(title)))
        g.add((node, DCT.description, Literal(description)))
        g.add((node, DCAT.downloadURL, Literal(url)))
        g.add((node, DCT.issued, Literal(issued)))
        g.add((node, DCT.modified, Literal(modified)))
        g.add((node, DCAT.bytesSize, Literal(filesize)))
        g.add((node, DCAT.mediaType, Literal(mime)))
        g.add((node, DCT.term('format'), Literal('CSV')))

        checksum = BNode()
        g.add((node, SPDX.checksum, checksum))
        g.add((checksum, RDF.type, SPDX.Checksum))
        g.add((checksum, SPDX.algorithm, SPDX.checksumAlgorithm_sha1))
        g.add((checksum, SPDX.checksumValue, Literal(sha1)))

        resource = resource_from_rdf(g)
        resource.validate()

        assert isinstance(resource, Resource)
        assert resource.title == title
        assert resource.url == url
        assert resource.description == description
        assert resource.filesize == filesize
        assert resource.mime == mime
        assert isinstance(resource.checksum, Checksum)
        assert resource.checksum.type == 'sha1'
        assert resource.checksum.value == sha1
        assert resource.published == issued
        assert resource.modified == modified
        assert resource.format == 'csv'
Beispiel #6
0
def resource_to_rdf(resource, dataset=None, graph=None):
    '''
    Map a Resource domain model to a DCAT/RDF graph
    '''
    graph = graph or Graph(namespace_manager=namespace_manager)
    if dataset and dataset.id:
        id = URIRef(
            url_for('datasets.show_redirect',
                    dataset=dataset.id,
                    _external=True,
                    _anchor='resource-{0}'.format(resource.id)))
    else:
        id = BNode(resource.id)
    permalink = url_for('datasets.resource', id=resource.id, _external=True)
    r = graph.resource(id)
    r.set(RDF.type, DCAT.Distribution)
    r.set(DCT.identifier, Literal(resource.id))
    r.add(DCT.title, Literal(resource.title))
    r.add(DCT.description, Literal(resource.description))
    r.add(DCAT.downloadURL, URIRef(resource.url))
    r.add(DCAT.accessURL, URIRef(permalink))
    r.add(DCT.issued, Literal(resource.published))
    r.add(DCT.modified, Literal(resource.modified))
    if dataset and dataset.license:
        r.add(DCT.rights, Literal(dataset.license.title))
        if dataset.license.url:
            r.add(DCT.license, URIRef(dataset.license.url))
    if resource.filesize is not None:
        r.add(DCAT.bytesSize, Literal(resource.filesize))
    if resource.mime:
        r.add(DCAT.mediaType, Literal(resource.mime))
    if resource.format:
        r.add(DCT.term('format'), Literal(resource.format))
    if resource.checksum:
        checksum = graph.resource(BNode())
        checksum.set(RDF.type, SPDX.Checksum)
        algorithm = 'checksumAlgorithm_{0}'.format(resource.checksum.type)
        checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
        checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
        r.add(SPDX.checksum, checksum)
    return r
Beispiel #7
0
def title_from_rdf(rdf, url):
    '''
    Try to extract a distribution title from a property.
    As it's not a mandatory property,
    it fallback on building a title from the URL
    then the format and in last ressort a generic resource name.
    '''
    title = rdf_value(rdf, DCT.title)
    if title:
        return title
    if url:
        last_part = url.split('/')[-1]
        if '.' in last_part and '?' not in last_part:
            return last_part
    fmt = rdf_value(rdf, DCT.term('format'))
    lang = current_app.config['DEFAULT_LANGUAGE']
    with i18n.language(lang):
        if fmt:
            return i18n._('{format} resource').format(format=fmt.lower())
        else:
            return i18n._('Nameless resource')
Beispiel #8
0
def title_from_rdf(rdf, url):
    '''
    Try to extract a distribution title from a property.
    As it's not a mandatory property,
    it fallback on building a title from the URL
    then the format and in last ressort a generic resource name.
    '''
    title = rdf_value(rdf, DCT.title)
    if title:
        return title
    if url:
        last_part = url.split('/')[-1]
        if '.' in last_part and '?' not in last_part:
            return last_part
    fmt = rdf_value(rdf, DCT.term('format'))
    lang = current_app.config['DEFAULT_LANGUAGE']
    with i18n.language(lang):
        if fmt:
            return i18n._('{format} resource').format(format=fmt.lower())
        else:
            return i18n._('Nameless resource')
Beispiel #9
0
def resource_to_rdf(resource, dataset=None, graph=None):
    '''
    Map a Resource domain model to a DCAT/RDF graph
    '''
    graph = graph or Graph(namespace_manager=namespace_manager)
    if dataset and dataset.id:
        id = URIRef(url_for('datasets.show_redirect', dataset=dataset.id,
                            _external=True,
                            _anchor='resource-{0}'.format(resource.id)))
    else:
        id = BNode(resource.id)
    permalink = url_for('datasets.resource', id=resource.id, _external=True)
    r = graph.resource(id)
    r.set(RDF.type, DCAT.Distribution)
    r.set(DCT.identifier, Literal(resource.id))
    r.add(DCT.title, Literal(resource.title))
    r.add(DCT.description, Literal(resource.description))
    r.add(DCAT.downloadURL, URIRef(resource.url))
    r.add(DCAT.accessURL, URIRef(permalink))
    r.add(DCT.issued, Literal(resource.published))
    r.add(DCT.modified, Literal(resource.modified))
    if dataset and dataset.license:
        r.add(DCT.rights, Literal(dataset.license.title))
        if dataset.license.url:
            r.add(DCT.license, URIRef(dataset.license.url))
    if resource.filesize is not None:
        r.add(DCAT.bytesSize, Literal(resource.filesize))
    if resource.mime:
        r.add(DCAT.mediaType, Literal(resource.mime))
    if resource.format:
        r.add(DCT.term('format'), Literal(resource.format))
    if resource.checksum:
        checksum = graph.resource(BNode())
        checksum.set(RDF.type, SPDX.Checksum)
        algorithm = 'checksumAlgorithm_{0}'.format(resource.checksum.type)
        checksum.add(SPDX.algorithm, getattr(SPDX, algorithm))
        checksum.add(SPDX.checksumValue, Literal(resource.checksum.value))
        r.add(SPDX.checksum, checksum)
    return r