Ejemplo n.º 1
0
def check_pids(key, data, errors, context):
    '''
    Check that compulsory PIDs exist. Also check that primary data PID is not modified in any way.
    '''

    # Empty PIDs are removed in actions, so this check should do
    if data.get((u'pids', 0, u'id'), None) is None:
        raise Invalid({'key': 'pids', 'value': _('Missing dataset PIDs')})

    primary_data_pid_found = False
    primary_pid = None

    primary_keys = [k for k in data.keys() if k[0] == 'pids' and k[2] == 'primary']

    for k in primary_keys:
        if asbool(data[k] or False) and data[(k[0], k[1], 'type')] == 'data' and data[(k[0], k[1], 'id')]:
            primary_data_pid_found = True
            primary_pid = data[(k[0], k[1], 'id')]

    if not primary_data_pid_found:
        raise Invalid({'key': 'pids', 'value': _("Missing primary data PID")})

    # Check constancy of primary data PID

    try:
        data_dict = logic.get_action('package_show')({}, {'id': data[('id',)]})
        old_primary_pid = utils.get_pids_by_type('data', data_dict, primary=True)[0].get('id')
        if old_primary_pid and old_primary_pid != primary_pid:
            raise Invalid({'key': 'pids', 'value': _("Primary data PID can not be modified")})
    except (logic.NotFound, KeyError):
        # New dataset, all is well
        pass
Ejemplo n.º 2
0
def get_urn_fi_address(package):
    if package.get('id', '').startswith('http://') or package.get('id', '').startswith('https://'):
        return package.get('id')
    pid = get_pids_by_type('data', package, primary=True)[0].get('id', None)
    if is_urn(pid):
        template = config.get('ckanext.kata.urn_address_template', "http://urn.fi/%(pid)s")
        return template % {'pid': pid}
    return ''
Ejemplo n.º 3
0
def check_primary_pids(key, data, errors, context):
    '''
    Check that primary pids exist, if not, get them from package.id and package.name

    :param key: key
    :param data: data
    :param errors: validation errors
    :param context: context
    '''

    data_pids = utils.get_pids_by_type('data', {'pids': data.get(('pids',))}, primary=True)

    if not data_pids:
        data[('pids',)].append({'primary': u'True', 'type': 'data', 'id': data[('name',)]})
Ejemplo n.º 4
0
def _handle_pids(context, data_dict):
    '''
    Do some PID modifications to data_dict
    '''
    if not 'pids' in data_dict:
        data_dict['pids'] = []
    else:
        # Clean up empty PIDs
        non_empty = []

        for pid in data_dict['pids']:
            if pid.get('id'):
                non_empty.append(pid)

        data_dict['pids'] = non_empty

    if data_dict.get('generate_version_pid') == 'on':
        data_dict['pids'] += [{'id': utils.generate_pid(),
                               'type': 'version',
                               'provider': 'Etsin',
                               }]

    # If no primary data PID, generate one if this is a new dataset
    if not utils.get_pids_by_type('data', data_dict, primary=True):
        model = context["model"]
        session = context["session"]

        if data_dict.get('id'):
            query = session.query(model.Package.id).filter_by(name=data_dict['id'])  # id contains name !
            result = query.first()

            if result:
                return  # Existing dataset, don't generate new data PID

        data_dict['pids'].insert(0, {'id': utils.generate_pid(),
                                     'type': 'data',
                                     'primary': 'True',
                                     'provider': 'Etsin',
                                     })
Ejemplo n.º 5
0
    def test_get_pids_by_type(self):
        data_dict = copy.deepcopy(TEST_DATADICT)
        data_dict['id'] = 'some_package.id'

        pids = utils.get_pids_by_type(u'relation', data_dict)
        assert len(pids) == 3
        pids = utils.get_pids_by_type(u'primary', data_dict)
        assert len(pids) == 1
        pids = utils.get_pids_by_type(u'relation', data_dict, relation='isPreviousVersionOf')
        assert len(pids) == 1
        pids = utils.get_pids_by_type(u'relation', data_dict, relation='isPartOf')
        assert len(pids) == 1
        pids = utils.get_pids_by_type(u'relation', data_dict, relation='generalRelation')
        assert len(pids) == 1

        pids = utils.get_pids_by_type('some_unknown_type', data_dict)
        assert len(pids) == 0
Ejemplo n.º 6
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):

        g = self.g

        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # Etsin: homepage
        uri = url_for(controller='package', action='read',
                      id=dataset_dict.get('name'), qualified=True)
        g.add((dataset_ref, FOAF.homepage, URIRef(uri)))

        # Etsin: primary identifiers
        data_pids = get_pids_by_type('data', dataset_dict)
        for pid in data_pids:
            g.add((dataset_ref, ADMS.identifier, URIRef(pid.get('id'))))

        version_pids = get_pids_by_type('version', dataset_dict)
        for pid in version_pids:
            g.add((dataset_ref, DCT.identifier, URIRef(pid.get('id'))))
            g.add((dataset_ref, DCT.isVersionOf, URIRef(pid.get('id'))))

        # Etsin: Title and Description, including translations
        items = [
            (DCT.title, 'langtitle', 'title'),
            (DCT.description, 'notes'),
        ]

        for item in items:
            self._add_translated_triple_from_dict(
                dataset_dict, dataset_ref, *item)

        # Etsin: Agents
        for agent in dataset_dict.get('agent', []):
            agent_role = agent.get('role')
            agent_id = agent.get('id')

            # Rights Holders
            if agent_role in ['owner', 'distributor']:
                name = agent.get('name', None)

                if agent_role == 'owner':
                    if not get_if_url(agent.get('name')):
                        name = agent.get('name', agent.get('organisation', ''))
                    nodetype = DCT.rightsHolder

                if agent_role == 'distributor':
                    nodetype = DCT.publisher

                agent_node_ref = BNode()
                g.add((agent_node_ref, RDF.type, FOAF.Agent))
                g.add((dataset_ref, nodetype, agent_node_ref))
                g.add((agent_node_ref, FOAF.name, Literal(name)))
                if agent_id:
                    g.add((agent_node_ref, DCT.identifier, Literal(agent_id)))

            # Authors
            if agent_role in ['author', 'contributor']:
                if agent_role == 'author':
                    nodetype = DCT.creator

                if agent_role == 'contributor':
                    nodetype = DCT.contributor

                organization_ref = BNode()
                agent_ref = BNode()
                memberof_ref = BNode()
                creator_ref = BNode()

                g.add((organization_ref, FOAF.name, Literal(
                    agent.get('organisation', None))))
                g.add((memberof_ref, FOAF.organization, organization_ref))
                g.add((agent_ref, ORG.memberOf, memberof_ref))
                g.add((agent_ref, FOAF.name, Literal(agent.get('name', None))))
                g.add((creator_ref, FOAF.Agent, agent_ref))
                g.add((dataset_ref, nodetype, creator_ref))

                if agent_id:
                    g.add((agent_ref, DCT.identifier, Literal(agent_id)))


            # Funders
            if agent.get('role') == 'funder':
                organization_ref = BNode()
                memberof_ref = BNode()
                project_ref = BNode()
                isoutputof_ref = BNode()

                agent_url = agent.get('URL')
                if agent_url:
                    g.add((project_ref, FOAF.homepage, Literal(agent_url)))

                funding_id = agent.get('fundingid')
                if funding_id:
                    g.add((project_ref, RDFS.comment, Literal(funding_id)))

                g.add((organization_ref, FOAF.name, Literal(
                    agent.get('organisation', None))))
                g.add((memberof_ref, FOAF.organization, organization_ref))
                g.add((project_ref, ORG.memberOf, memberof_ref))

                agent_name = agent.get('name', None)
                g.add((project_ref, FOAF.name, Literal(agent_name)))

                if agent_id:
                    g.add((project_ref, DCT.identifier, Literal(agent_id)))

                g.add((isoutputof_ref, FOAF.Project, project_ref))
                g.add((dataset_ref, FRAPO.isOutputOf, isoutputof_ref))

        # Etsin: Publishers
        for contact in dataset_dict.get('contact'):
            agent_node_ref = BNode()
            agent_id = contact.get('id')

            g.add((agent_node_ref, RDF.type, FOAF.Agent))
            g.add((dataset_ref, DCT.publisher, agent_node_ref))

            contact_name = contact.get('name', None)
            g.add((agent_node_ref, FOAF.name, Literal(contact_name)))
            if agent_id:
                g.add((agent_node_ref, DCT.identifier, Literal(agent_id)))

            contact_email = contact.get('email')
            if contact_email and contact_email != 'hidden':
                g.add((agent_node_ref, FOAF.mbox,
                       URIRef("mailto:" + contact_email)))

            contact_url = contact.get('URL')
            if contact_url:
                g.add((agent_node_ref, FOAF.homepage, URIRef(contact_url)))

            contact_phone = contact.get('phone')
            if contact_phone:
                g.add((agent_node_ref, FOAF.phone,
                       URIRef("tel:" + contact_phone)))

        # Etsin: Organization
        organization_name = resolve_org_name(dataset_dict.get('owner_org'))
        publisher_ref = BNode()
        g.add((dataset_ref, DCT.publisher, publisher_ref))
        g.add((publisher_ref, FOAF.organization, Literal(organization_name)))

        # Etsin: Tags - can be URLs or user inputted keywords
        # TODO: resolve URLs from Finto. Currently get_label_for_uri() breaks
        # RDFlib.
        for tag in dataset_dict.get('tags', []):
            display_name = tag.get('display_name')
            g.add((dataset_ref, DCAT.keyword, Literal(display_name)))
            tag_name = tag.get('name')
            if is_url(tag_name):
                g.add((dataset_ref, DCAT.theme, URIRef(tag_name)))

        # Etsin: Dates
        # Peter: Issued-field is new. This used to be inside CatalogRecord.
        items = [
            ('issued', DCT.issued, ['metadata_created'], Literal),
            ('modified', DCT.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_ref, items)

        # Etsin: Events
        for event in dataset_dict.get('event', []):
            event_ref = BNode()
            g.add((dataset_ref, DCT.event, event_ref))
            g.add((event_ref, DCT.type, Literal(event.get('type'))))
            g.add((event_ref, DCT.creator, Literal(event.get('who'))))
            g.add((event_ref, DCT.date, Literal(str(event.get('when')))))
            g.add((event_ref, DCT.description, Literal(event.get('descr'))))

        # Etsin: Citation
        citation = dataset_dict.get('citation')
        if citation:
            g.add((dataset_ref, DCT.bibliographicCitation, Literal(citation)))      


        # Etsin: Distribution
        availability_list = ['access_application',
                             'access_request', 'through_provider']

        checksum_ref = BNode()
        checksum_parent_ref = BNode()
        distribution_ref = BNode()
        dist_parent_ref = BNode()

        if dataset_dict.get('availability') == 'direct_download':
            access_url = get_download_url(dataset_dict)
            g.add((distribution_ref, DCAT.downloadURL, Literal(access_url)))

        checksum = dataset_dict.get('checksum')
        algorithm = dataset_dict.get('algorithm')
        if checksum and algorithm:
            g.add((checksum_ref, SPDX.checksumValue, Literal(checksum)))
            g.add((checksum_ref, SPDX.algorithm, Literal(algorithm)))
            g.add((checksum_parent_ref, SPDX.Checksum, checksum_ref))
            g.add((distribution_ref, SPDX.checksum, checksum_parent_ref))

        if dataset_dict.get('availability') in availability_list:
            access_url = get_download_url(dataset_dict)
            g.add((distribution_ref, DCAT.accessURL, Literal(access_url)))

        mimetype = dataset_dict.get('mimetype')
        if mimetype:
            g.add((distribution_ref, DCAT.mediaType, Literal(mimetype)))

        dist_format = dataset_dict.get('format')
        if dist_format:
            g.add((distribution_ref, DCT['format'], Literal(dist_format)))

        g.add((dist_parent_ref, DCAT.Distribution, distribution_ref))
        g.add((dataset_ref, DCAT.distribution, dist_parent_ref))

        # Etsin: Disciplines
        disciplines = dataset_dict.get('discipline', '')
        for discipline in split_disciplines(disciplines):
            if is_url(discipline):
                disc = URIRef(discipline)

            else:
                disc = Literal(discipline)
            g.add((dataset_ref, DCT.subject, disc))

        # Etsin: Rights Declaration
        # Peter: There's no way to add an xmlns attribute under
        # the parent <DCT:rights> in rdflib
        category, declarations = get_rightscategory(dataset_dict)
        declaration_strings = ''
        for declaration in declarations:
            declaration_strings += u'<RightsDeclaration>{}</RightsDeclaration>\n'\
                .format(declaration)
        xml_string = u'<RightsDeclarationMD RIGHTSCATEGORY="{}" \
            xmlns="http://www.loc.gov/METS/" >\n{}</RightsDeclarationMD>'\
            .format(category, declaration_strings)

        license_url = dataset_dict.get('license_URL')

        rights_ref = BNode()
        g.add((dataset_ref, DCT.rights, rights_ref))
        g.add((rights_ref, DCT.RightsStatement, Literal(
            xml_string, datatype=RDF.XMLLiteral)))
        g.add((rights_ref, DCT.RightsStatement, Literal(license_url)))


        # Etsin: Spatial
        coverage = dataset_dict.get('geographic_coverage')
        if coverage:
            spatial_ref = BNode()
            location_ref = BNode()
            g.add((location_ref, RDFS.label, Literal(coverage)))
            g.add((spatial_ref, DCT.Location, location_ref))
            g.add((dataset_ref, DCT.spatial_ref, spatial_ref))

        # Etsin: Temporal
        # Peter: hasBeginning and hasEnd left out
        temporal_coverage_begin = dataset_dict.get('temporal_coverage_begin')
        temporal_coverage_end = dataset_dict.get('temporal_coverage_end')
        if temporal_coverage_begin or temporal_coverage_end:
            temporal_extent = BNode()

            g.add((temporal_extent, RDF.type, DCT.PeriodOfTime))
            if temporal_coverage_begin:
                self._add_date_triple(
                    temporal_extent, SCHEMA.startDate, temporal_coverage_begin)

            if temporal_coverage_end:
                self._add_date_triple(
                    temporal_extent, SCHEMA.endDate, temporal_coverage_end)

            g.add((dataset_ref, DCT.temporal, temporal_extent))

        # Etsin: language field needs to be stripped from spaces
        langs = self._get_dict_value(dataset_dict, 'language', '').split(', ')
        for lang in langs:
            params = (dataset_ref, DCAT.language, Literal(lang))
            self.g.add(params)
Ejemplo n.º 7
0
    def test_get_pids_by_type(self):
        data_dict = copy.deepcopy(TEST_DATADICT)
        data_dict['id'] = 'some_package.id'
        data_dict['name'] = 'some_package.name'

        pids = utils.get_pids_by_type('data', data_dict)
        assert len(pids) == 2
        pids = utils.get_pids_by_type('data', data_dict, primary=True)
        assert len(pids) == 1
        pids = utils.get_pids_by_type('data', data_dict, primary=True, use_package_id=True)
        assert len(pids) == 1
        pids = utils.get_pids_by_type('data', data_dict, primary=False)
        assert len(pids) == 1

        pids = utils.get_pids_by_type('metadata', data_dict)
        assert len(pids) == 1
        pids = utils.get_pids_by_type('metadata', data_dict, primary=True)
        assert len(pids) == 0
        pids = utils.get_pids_by_type('metadata', data_dict, primary=True, use_package_id=True)
        assert len(pids) == 1
        pids = utils.get_pids_by_type('metadata', data_dict, use_package_id=True)
        assert len(pids) == 2

        pids = utils.get_pids_by_type('version', data_dict)
        assert len(pids) == 1
        pids = utils.get_pids_by_type('version', data_dict, primary=True)
        assert len(pids) == 0
        pids = utils.get_pids_by_type('version', data_dict, primary=True, use_package_id=True)
        assert len(pids) == 0

        pids = utils.get_pids_by_type('some_unknown_type', data_dict)
        assert len(pids) == 0
Ejemplo n.º 8
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):
        primary_pid = get_primary_pid(dataset_dict)
        if not primary_pid:
            return

        g = self.g

        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # Etsin: homepage
        uri = url_for(controller='package',
                      action='read',
                      id=dataset_dict.get('name'),
                      qualified=True)
        g.add(
            (dataset_ref, FOAF.homepage, URIRef(remove_trailing_spaces(uri))))

        # Etsin: primary identifier
        g.add((dataset_ref, ADMS.identifier,
               URIRef(remove_trailing_spaces(primary_pid))))

        # Etsin: Relation identifiers
        relation_pids = get_pids_by_type('relation', dataset_dict)
        for rpid in relation_pids:
            if rpid.get('relation') == 'isNewVersionOf' or rpid.get(
                    'relation') == 'isPreviousVersionOf':
                g.add((dataset_ref, DCT.isVersionOf,
                       URIRef(remove_trailing_spaces(rpid.get('id')))))
            elif rpid.get('relation') == 'hasPart':
                g.add((dataset_ref, DCT.hasPart,
                       URIRef(remove_trailing_spaces(rpid.get('id')))))
            elif rpid.get('relation') == 'isPartOf':
                g.add((dataset_ref, DCT.isPartOf,
                       URIRef(remove_trailing_spaces(rpid.get('id')))))
            else:
                g.add((dataset_ref, DCT.identifier,
                       URIRef(remove_trailing_spaces(rpid.get('id')))))

        # Etsin: Title and Description, including translations
        items = [
            (DCT.title, 'langtitle', 'title'),
            (DCT.description, 'notes'),
        ]

        for item in items:
            self._add_translated_triple_from_dict(dataset_dict, dataset_ref,
                                                  *item)

        # Etsin: Agents
        for agent in dataset_dict.get('agent', []):
            agent_role = agent.get('role')
            agent_id = agent.get('id')

            # Rights Holders
            if agent_role in ['owner', 'distributor']:
                name = agent.get('name', None)

                if agent_role == 'owner':
                    if not get_if_url(agent.get('name')):
                        name = agent.get('name', agent.get('organisation', ''))
                    nodetype = DCT.rightsHolder

                if agent_role == 'distributor':
                    nodetype = DCT.publisher

                agent_node_ref = BNode()
                g.add((agent_node_ref, RDF.type, FOAF.Agent))
                g.add((dataset_ref, nodetype, agent_node_ref))
                g.add((agent_node_ref, FOAF.name, Literal(name)))
                if agent_id:
                    g.add((agent_node_ref, DCT.identifier, Literal(agent_id)))

            # Authors
            if agent_role in ['author', 'contributor']:
                if agent_role == 'author':
                    nodetype = DCT.creator

                if agent_role == 'contributor':
                    nodetype = DCT.contributor

                organization_ref = BNode()
                agent_ref = BNode()
                memberof_ref = BNode()
                creator_ref = BNode()

                g.add((organization_ref, FOAF.name,
                       Literal(agent.get('organisation', None))))
                g.add((memberof_ref, FOAF.organization, organization_ref))
                g.add((agent_ref, ORG.memberOf, memberof_ref))
                g.add((agent_ref, FOAF.name, Literal(agent.get('name', None))))
                g.add((creator_ref, FOAF.Agent, agent_ref))
                g.add((dataset_ref, nodetype, creator_ref))

                if agent_id:
                    g.add((agent_ref, DCT.identifier, Literal(agent_id)))

            # Funders
            if agent.get('role') == 'funder':
                organization_ref = BNode()
                memberof_ref = BNode()
                project_ref = BNode()
                isoutputof_ref = BNode()

                agent_url = agent.get('URL')
                if agent_url:
                    g.add((project_ref, FOAF.homepage, Literal(agent_url)))

                funding_id = agent.get('fundingid')
                if funding_id:
                    g.add((project_ref, RDFS.comment, Literal(funding_id)))

                g.add((organization_ref, FOAF.name,
                       Literal(agent.get('organisation', None))))
                g.add((memberof_ref, FOAF.organization, organization_ref))
                g.add((project_ref, ORG.memberOf, memberof_ref))

                agent_name = agent.get('name', None)
                g.add((project_ref, FOAF.name, Literal(agent_name)))

                if agent_id:
                    g.add((project_ref, DCT.identifier, Literal(agent_id)))

                g.add((isoutputof_ref, FOAF.Project, project_ref))
                g.add((dataset_ref, FRAPO.isOutputOf, isoutputof_ref))

        # Etsin: Publishers
        for contact in dataset_dict.get('contact'):
            agent_node_ref = BNode()
            agent_id = contact.get('id')

            g.add((agent_node_ref, RDF.type, FOAF.Agent))
            g.add((dataset_ref, DCT.publisher, agent_node_ref))

            contact_name = contact.get('name', None)
            g.add((agent_node_ref, FOAF.name, Literal(contact_name)))
            if agent_id:
                g.add((agent_node_ref, DCT.identifier, Literal(agent_id)))

            contact_email = contact.get('email')
            if contact_email and contact_email != 'hidden':
                g.add((agent_node_ref, FOAF.mbox,
                       URIRef("mailto:" +
                              remove_trailing_spaces(contact_email))))

            contact_url = contact.get('URL')
            if contact_url:
                g.add((agent_node_ref, FOAF.homepage,
                       URIRef(remove_trailing_spaces(contact_url))))

            contact_phone = remove_all_spaces(contact.get('phone'))
            if contact_phone:
                g.add((agent_node_ref, FOAF.phone,
                       URIRef("tel:" + remove_trailing_spaces(contact_phone))))

        # Etsin: Organization
        organization_name = resolve_org_name(dataset_dict.get('owner_org'))
        publisher_ref = BNode()
        g.add((dataset_ref, DCT.publisher, publisher_ref))
        g.add((publisher_ref, FOAF.organization, Literal(organization_name)))

        # Etsin: Tags - can be URLs or user inputted keywords
        # TODO: resolve URLs from Finto. Currently get_label_for_uri() breaks
        # RDFlib.
        for tag in dataset_dict.get('tags', []):
            display_name = tag.get('display_name')
            g.add((dataset_ref, DCAT.keyword, Literal(display_name)))
            tag_name = tag.get('name')
            if is_url(tag_name):
                g.add((dataset_ref, DCAT.theme,
                       URIRef(remove_trailing_spaces(tag_name))))

        # Etsin: Dates
        # Peter: Issued-field is new. This used to be inside CatalogRecord.
        items = [
            ('issued', DCT.issued, ['metadata_created'], Literal),
            ('modified', DCT.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_ref, items)

        # Etsin: Events
        for event in dataset_dict.get('event', []):
            event_ref = BNode()
            g.add((dataset_ref, DCT.event, event_ref))
            g.add((event_ref, DCT.type, Literal(event.get('type'))))
            g.add((event_ref, DCT.creator, Literal(event.get('who'))))
            g.add((event_ref, DCT.date, Literal(str(event.get('when')))))
            g.add((event_ref, DCT.description, Literal(event.get('descr'))))

        # Etsin: Citation
        citation = dataset_dict.get('citation')
        if citation:
            g.add((dataset_ref, DCT.bibliographicCitation, Literal(citation)))

        # Etsin: Distribution
        availability_list = [
            'access_application_rems', 'access_application_other',
            'access_request'
        ]

        checksum_ref = BNode()
        checksum_parent_ref = BNode()
        distribution_ref = BNode()
        dist_parent_ref = BNode()

        if dataset_dict.get('availability') == 'direct_download':
            access_url = get_download_url(dataset_dict)
            g.add((distribution_ref, DCAT.downloadURL, Literal(access_url)))

        checksum = dataset_dict.get('checksum')
        algorithm = dataset_dict.get('algorithm')
        if checksum and algorithm:
            g.add((checksum_ref, SPDX.checksumValue, Literal(checksum)))
            g.add((checksum_ref, SPDX.algorithm, Literal(algorithm)))
            g.add((checksum_parent_ref, SPDX.Checksum, checksum_ref))
            g.add((distribution_ref, SPDX.checksum, checksum_parent_ref))

        if dataset_dict.get('availability') in availability_list:
            access_url = get_download_url(dataset_dict)
            g.add((distribution_ref, DCAT.accessURL, Literal(access_url)))

        mimetype = dataset_dict.get('mimetype')
        if mimetype:
            g.add((distribution_ref, DCAT.mediaType, Literal(mimetype)))

        dist_format = dataset_dict.get('format')
        if dist_format:
            g.add((distribution_ref, DCT['format'], Literal(dist_format)))

        g.add((dist_parent_ref, DCAT.Distribution, distribution_ref))
        g.add((dataset_ref, DCAT.distribution, dist_parent_ref))

        # Etsin: Disciplines
        disciplines = dataset_dict.get('discipline', '')
        for discipline in split_disciplines(disciplines):
            if is_url(discipline):
                disc = URIRef(remove_trailing_spaces(discipline))

            else:
                disc = Literal(discipline)
            g.add((dataset_ref, DCT.subject, disc))

        # Etsin: Rights Declaration
        # Peter: There's no way to add an xmlns attribute under
        # the parent <DCT:rights> in rdflib
        category, declarations = get_rightscategory(dataset_dict)
        declaration_strings = ''
        for declaration in declarations:
            declaration_strings += u'<RightsDeclaration>{}</RightsDeclaration>\n'\
                .format(declaration)
        xml_string = u'<RightsDeclarationMD RIGHTSCATEGORY="{}" \
            xmlns="http://www.loc.gov/METS/" >\n{}</RightsDeclarationMD>'\
            .format(category, declaration_strings)

        license_url = dataset_dict.get('license_URL')

        rights_ref = BNode()
        g.add((dataset_ref, DCT.rights, rights_ref))
        g.add((rights_ref, DCT.RightsStatement,
               Literal(xml_string, datatype=RDF.XMLLiteral)))
        g.add((rights_ref, DCT.RightsStatement, Literal(license_url)))

        # Etsin: Spatial
        coverage = dataset_dict.get('geographic_coverage')
        if coverage:
            spatial_ref = BNode()
            location_ref = BNode()
            g.add((location_ref, RDFS.label, Literal(coverage)))
            g.add((spatial_ref, DCT.Location, location_ref))
            g.add((dataset_ref, DCT.spatial_ref, spatial_ref))

        # Etsin: Temporal
        # Peter: hasBeginning and hasEnd left out
        temporal_coverage_begin = dataset_dict.get('temporal_coverage_begin')
        temporal_coverage_end = dataset_dict.get('temporal_coverage_end')
        if temporal_coverage_begin or temporal_coverage_end:
            temporal_extent = BNode()

            g.add((temporal_extent, RDF.type, DCT.PeriodOfTime))
            if temporal_coverage_begin:
                self._add_date_triple(temporal_extent, SCHEMA.startDate,
                                      temporal_coverage_begin)

            if temporal_coverage_end:
                self._add_date_triple(temporal_extent, SCHEMA.endDate,
                                      temporal_coverage_end)

            g.add((dataset_ref, DCT.temporal, temporal_extent))

        # Etsin: language field needs to be stripped from spaces
        langs = self._get_dict_value(dataset_dict, 'language', '').split(', ')
        for lang in langs:
            params = (dataset_ref, DCAT.language, Literal(lang))
            self.g.add(params)