Esempio n. 1
0
    def render_record(self, c):

        if c.resource['format'].lower() != 'dwc':
            abort(404, _('Record not in Darwin Core format'))

        c.record_title = c.record_dict.get('catalogNumber', None) or c.record_dict.get('occurrenceID')
        c.dwc_terms = dwc_terms(c.record_dict.keys())

        try:
            c.dynamic_properties = c.dwc_terms.pop('dynamicProperties')
        except IndexError:
            c.dynamic_properties = []

        return p.toolkit.render('record/dwc.html')
    def _dwc(self):
        '''
        Yields triples describing the record using DWC (DarWin Core) terms.

        :return: yields triples
        '''
        yield self.record_ref, self.namespaces.dc.identifier, Literal(self.record[u'occurrenceID'])

        dwc_terms_dict = dwc_terms(self.record.keys())

        groups_to_skip = {u'dynamicProperties'}
        terms_to_skip = {u'associatedMedia', u'created', u'modified'}
        for group, terms in dwc_terms_dict.items():
            if group in groups_to_skip:
                continue

            for uri, term in terms.items():
                if term in terms_to_skip:
                    continue
                yield (self.record_ref, getattr(self.namespaces.dwc, term),
                       Literal(self.record.get(term)))

        # retrieve the dynamic properties and yield them as one JSON dump
        dynamic_properties_dict = {}
        for properties in dwc_terms_dict.get(u'dynamicProperties', {}).values():
            for dynamic_property in properties:
                if dynamic_property == u'created':
                    continue
                dynamic_properties_dict[dynamic_property] = self.record.get(dynamic_property)
        if dynamic_properties_dict:
            yield self.record_ref, self.namespaces.dwc.dynamicProperties, \
                  Literal(json.dumps(dynamic_properties_dict))

        # yield the associatedMedia term as a pipe-separated list of image URIs
        media = self.record.get(u'associatedMedia', [])
        if media:
            yield self.record_ref, self.namespaces.dwc.associatedMedia, \
                  Literal(as_dwc_list(map(itemgetter(u'identifier'), media)))

        # yield the created date in the correct format
        yield (self.record_ref, self.namespaces.dc.created,
               Literal(epoch_to_datetime(self.record.get(u'created'))))

        # yield the modified date in the correct format
        yield (self.record_ref, self.namespaces.dwc.modified,
               Literal(epoch_to_datetime(self.record.get(u'modified'))))
Esempio n. 3
0
    def graph_from_record(self, record_dict, resource, record_ref):
        """
        RDF for an individual record - currently this is a specimen record

        Similar approach to: curl -L -H "Accept: application/rdf+ttl" http://data.rbge.org.uk/herb/E00321910

        :param record_dict:
        :param resource:
        :param record_ref:
        :return:
        """
        context = self.get_context()
        namespaces = {
            'dc': DC,
            'dcat': DCAT,
            'dwc': DWC,
            'sdwc': SDWC,
            'void': VOID,
            'cc': CC,
            'foaf': FOAF,
            'dqv': DQV,
            'aiiso': AIISO,
            'tdwgi': TDWGI,
            'owl': OWL
        }

        g = self.g

        # Add some more namespaces
        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        # Get the GBIF record if it exists
        occurrence_id = record_dict.get('occurrenceID')

        package_id = resource.get_package_id()

        # Create licences metadata for record
        object_uri = URIRef(record_ref + '#object')

        # Add publisher - as per BBC we don't need the full org description here
        nhm_uri = URIRef('http://nhm.ac.uk')

        # Add object description - the metadata and license
        g.add((record_ref, RDF.type, FOAF.Document))
        g.add((record_ref, CC.license, URIRef(METADATA_LICENCE)))
        # This metadata describes #dataset
        g.add((record_ref, FOAF.primaryTopic, object_uri))
        # Add the de-referenced link to record
        record_link = url_for('record', action='view', package_name=package_id, resource_id=resource.id, record_id=record_dict['_id'], qualified=True)
        g.add((record_ref, DC.hasVersion, URIRef(record_link)))
        # Add institution properties
        g.add((record_ref, FOAF.organization, nhm_uri))
        g.add((record_ref, AIISO.Department, Literal(get_department(record_dict['collectionCode']))))

        try:
            sub_dept = record_dict.pop('subDepartment')
        except KeyError:
            pass
        else:
            g.add((record_ref, AIISO.Division, Literal(sub_dept)))

        # Created and modified belong to the metadata record, not the specimen
        for term in ['created', 'modified']:
            try:
                value = record_dict.get(term)
            except KeyError:
                pass
            else:
                # Parse into data format, and add as dates
                _date = parse_date(value)
                g.add((record_ref, getattr(DWC, term), Literal(_date.isoformat(), datatype=XSD.dateTime)))

        try:
            gbif_record = toolkit.get_action('gbif_record_show')(context, {
                'occurrence_id': occurrence_id
            })
        except NotFound:
            gbif_record = {}
        else:
            # Assert equivalence with the GBIF record
            gbif_uri = os.path.join('http://www.gbif.org/occurrence', gbif_record['gbifID'])
            g.add((object_uri, OWL.sameAs, URIRef(gbif_uri)))
            # If we have a GBIF country code, add it
            # Annoyingly, this seems to be the only geographic element on GBIF with URI
            country_code = gbif_record.get('gbifCountryCode')
            if country_code:
                g.add((object_uri, DWC.countryCode, URIRef(os.path.join('http://www.gbif.org/country', country_code))))

        # Now, create the specimen object
        # Remove nulls and hidden fields from record_dict
        record_dict = dict((k, v) for k, v in record_dict.iteritems() if v)

        # Now add the actual specimen object
        g.add((object_uri, RDF.type, FOAF.Document))
        g.add((object_uri, RDF.type, SDWC.SimpleDarwinRecordSet))

        # Make sure decimal latitude and longitude are strings
        for d in ['decimalLatitude', 'decimalLongitude']:
            try:
                record_dict[d] = str(record_dict[d])
            except KeyError:
                pass

        # Adding images as JSON is rubbish! So lets try and do it properly
        try:
            associated_media = record_dict.pop('associatedMedia')
        except KeyError:
            pass
        else:
            images = json.loads(associated_media)
            for image in images:
                image_uri = URIRef(image['identifier'])
                g.set((image_uri, RDF.type, FOAF.Image))
                title = image.get('title', None)
                if title:
                    g.set((image_uri, DC.title, Literal(title)))
                g.set((image_uri, CC.license, URIRef(image['license'])))
                g.set((image_uri, DC.RightsStatement, Literal(image['rightsHolder'])))
                g.set((image_uri, DC.Format, Literal(image['format'])))
                # Add link from image to object...
                g.set((image_uri, FOAF.depicts, object_uri))
                # And object to image
                g.add((object_uri, FOAF.depiction, image_uri))

        # This record belongs in X dataset
        dataset_ref = URIRef(dataset_uri({'id': package_id}) + '#dataset')
        g.add((object_uri, VOID.inDataset, dataset_ref))

        dwc_terms_dict = dwc_terms(record_dict.keys())

        # Handle dynamic properties separately
        dynamic_properties = dwc_terms_dict.pop('dynamicProperties')

        for group, terms in dwc_terms_dict.items():
            for uri, term in terms.items():
                # Do we have a GBIF key value?
                # Uppercase first letter of term, and convert to GBIF key format => gbifGenusKey
                uc_term = term[0].upper() + term[1:]
                gbif_term_key = 'gbif%sKey' % uc_term
                gbif_key = gbif_record.get(gbif_term_key)

                # Do we have a GBIF key value? If we do, we can provide a URI to GBIF
                if gbif_key:
                    gbif_uri = URIRef(os.path.join('http://www.gbif.org/species', gbif_key))
                    # Add the GBIF species URI with label
                    g.add((gbif_uri, RDFS.label, Literal(record_dict.get(term))))
                    # And associated our specimen object's DWC term with the GBIF URI
                    g.add((object_uri, getattr(DWC, term), gbif_uri))
                else:
                    # We do not have a GBIF key, so no URI: Add the term value as a literal
                    g.add((object_uri, getattr(DWC, term), Literal(record_dict.get(term))))

        g.add((object_uri, DC.identifier, Literal(record_dict.get('uuid'))))

        dynamic_properties_dict = {}
        for properties in dynamic_properties.values():
            for property in properties:
                dynamic_properties_dict[property] = record_dict.get(property)
        if dynamic_properties_dict:
            g.add((object_uri, DWC.dynamicProperties, Literal(json.dumps(dynamic_properties_dict))))