Python dc_metadata_readerの例、ckanext.oaipmh.oai_dc_reader.dc_metadata_reader Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_unit.py プロジェクト: LondonAppDev/ckanext-oaipmh

    def test_dc_metadata_reader(self):
        '''
        Test reading a whole file
        '''

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(self.xml))

        assert metadata

        assert 'unified' in metadata.getMap()
        assert 'availability' in metadata.getMap()['unified']

コード例 #2

0

ファイルを表示

ファイル: harvester.py プロジェクト: LondonAppDev/ckanext-oaipmh

    def parse_xml(self, f, context, orig_url=None, strict=True):
        """ Parse XML and return package data dictionary.

        :param f: data as string
        :param context: CKAN context
        :param orig_url: orgininal URL
        :param strict: No used here, required by caller
        :return: package dictionary (used for package creation)
        """
        metadata = dc_metadata_reader('default')(etree.fromstring(f))
        return metadata['unified']

コード例 #3

0

ファイルを表示

    def parse_xml(self, f, context, orig_url=None, strict=True):
        """ Parse XML and return package data dictionary.

        :param f: data as string
        :param context: CKAN context
        :param orig_url: orgininal URL
        :param strict: No used here, required by caller
        :return: package dictionary (used for package creation)
        """
        metadata = dc_metadata_reader('default')(etree.fromstring(f))
        return metadata['unified']

コード例 #4

0

ファイルを表示

    def test_dc_metadata_reader(self):
        '''
        Test reading a whole file
        '''

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(
            self.xml))

        assert metadata

        assert 'unified' in metadata.getMap()
        assert 'availability' in metadata.getMap()['unified']

コード例 #5

0

ファイルを表示

ファイル: test_unit.py プロジェクト: LondonAppDev/ckanext-oaipmh

    def _run_import(self, xml, ida, config=None):
        if not model.User.get('harvest'):
            model.User(name='harvest', sysadmin=True).save()
        if not model.Group.get('test'):
            get_action('organization_create')({'user': '******'}, {'name': 'test'})

        record = _get_record(xml)
        harvest_type = 'ida' if ida else 'default'
        if config is None:
            config = {'type': harvest_type}

        metadata = dc_metadata_reader(harvest_type)(record)
        metadata['unified']['owner_org'] = "test"
        harvest_object = _FakeHarvestObject(json.dumps(metadata.getMap()), "test_id", config)

        self.harvester.import_stage(harvest_object)

コード例 #6

0

ファイルを表示

ファイル: importformats.py プロジェクト: kata-csc/ckanext-oaipmh

def create_metadata_registry(harvest_type=None, service_url=None):
    '''Return new metadata registry with all common metadata readers

    The readers currently implemented are for metadataPrefixes
    oai_dc, nrd, rdf and xml.

    :returns: metadata registry instance
    :rtype: oaipmh.metadata.MetadataRegistry
    '''
    registry = om.MetadataRegistry()
    registry.registerReader('oai_dc', dc_metadata_reader(harvest_type or 'default'))
    registry.registerReader('cmdi0571', CmdiReader(service_url))
    registry.registerReader('oai_datacite3', DataCiteReader())
    registry.registerReader('nrd', nrd_metadata_reader)
    registry.registerReader('rdf', rdf_reader)
    registry.registerReader('xml', xml_reader)
    return registry

コード例 #7

0

ファイルを表示

ファイル: importformats.py プロジェクト: kata-csc/ckanext-oaipmh

def create_metadata_registry(harvest_type=None, service_url=None):
    '''Return new metadata registry with all common metadata readers

    The readers currently implemented are for metadataPrefixes
    oai_dc, nrd, rdf and xml.

    :returns: metadata registry instance
    :rtype: oaipmh.metadata.MetadataRegistry
    '''
    registry = om.MetadataRegistry()
    registry.registerReader('oai_dc',
                            dc_metadata_reader(harvest_type or 'default'))
    registry.registerReader('cmdi0571', CmdiReader(service_url))
    registry.registerReader('oai_datacite3', DataCiteReader())
    registry.registerReader('nrd', nrd_metadata_reader)
    registry.registerReader('rdf', rdf_reader)
    registry.registerReader('xml', xml_reader)
    return registry

コード例 #8

0

ファイルを表示

    def _run_import(self, xml, ida, config=None):
        if not model.User.get('harvest'):
            model.User(name='harvest', sysadmin=True).save()
        if not model.Group.get('test'):
            get_action('organization_create')({
                'user': '******'
            }, {
                'name': 'test'
            })

        record = _get_record(xml)
        harvest_type = 'ida' if ida else 'default'
        if config is None:
            config = {'type': harvest_type}

        metadata = dc_metadata_reader(harvest_type)(record)
        metadata['unified']['owner_org'] = "test"
        harvest_object = _FakeHarvestObject(json.dumps(metadata.getMap()),
                                            "test_id", config)

        self.harvester.import_stage(harvest_object)

コード例 #9

0

ファイルを表示

ファイル: ida.py プロジェクト: kata-csc/ckanext-oaipmh

 def parse_xml(self, f, context, orig_url=None, strict=True):
     metadata = dc_metadata_reader('ida')(etree.fromstring(f))
     return metadata['unified']

コード例 #10

0

ファイルを表示

    def test_dc_metadata_reader_fields(self):
        '''
        Test reading a whole file and check that fields are what they are supposed to be
        '''
        EXPECTED_FIELDS = {
            'access_application_URL':
            '',
            'access_request_URL':
            '',
            'algorithm':
            '',
            'availability':
            'contact_owner',
            'checksum':
            '',
            'contact': [],
            'direct_download_URL':
            u'http://link.aip.org/link/?jcp/123/064507',
            'discipline':
            '',
            'geographic_coverage':
            '',
            #'langtitle': [{'lang': '',
            #               'value': u'Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?'}],
            'title':
            '{"und": "Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?"}',
            'language':
            u'en',
            'license_URL':
            u'Copyright 2005 American Institute of Physics. This article may be downloaded for personal use only. Any other use requires prior permission of the author and the American Institute of Physics.',
            'license_id':
            'notspecified',
            'mimetype':
            '',
            'notes':
            '{"und": ""}',
            'pids': [
                {
                    'type': u'relation',
                    'relation': u'generalRelation',
                    'id': u'http://link.aip.org/link/?jcp/123/064507',
                    'provider': u'http://helda.helsinki.fi/oai/request'
                },
                {
                    'id': u'http://hdl.handle.net/10138/1074',
                    'provider': u'http://helda.helsinki.fi/oai/request',
                    'type': u'relation',
                    'relation': u'generalRelation'
                },
            ],
            'tag_string':
            '',
            'temporal_coverage_begin':
            '',
            'temporal_coverage_end':
            '',
            'type':
            'dataset',
            'version':
            u'2005-08-08',
            'uploader':
            u''
        }

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(
            self.xml))
        assert metadata

        data_dict = metadata['unified']

        temp = copy.copy(data_dict)

        temp.pop('agent')  # TODO: Compare also agents directly

        temp.pop(
            'id')  # Do not compare id since it is always generated by Etsin
        temp.pop(
            'name')  # Do not compare id since it is always generated by Etsin

        ### Pid reform introduced some changes that can't be validated with a
        ### simple dict compare
        # Do not compare primary pid generated by Etsin
        temp['pids'] = [
            pid for pid in temp['pids'] if pid['type'] != u'primary'
        ]
        # Smear url value may be any data pid id
        if temp['smear_url'] in \
        ['http://link.aip.org/link/?jcp/123/064507', 'http://hdl.handle.net/10138/1074']:
            temp.pop('smear_url')

        testfixtures.compare(temp, EXPECTED_FIELDS)

        # for (key, value) in EXPECTED_FIELDS.items():
        #     assert key in data_dict, "Key not found: %r" % key
        #
        #     output_value = data_dict.get(key)
        #
        #     # Note. Possibility for random fail, because data order is not promised by python
        #     # TODO: testfixtures.compare() could be used here to prevent random failing
        #     assert unicode(output_value) == unicode(value), "Values for key %r not matching: %r versus %r" % (
        #         key, value, output_value)

        fail_agent = 1
        fail_author = 3
        for agent in data_dict.get('agent', []):
            if agent['role'] == 'funder':
                for key, value in ('URL', ''), ('id', ''), ('fundingid',
                                                            ''), ('name', ''):
                    self.assertTrue(key in agent,
                                    "Expected to find key %s" % key)
                    self.assertEquals(agent[key], value)
                fail_agent -= 1
            elif agent['role'] == 'author':
                self.assertTrue(agent['name'] in (u'Khriachtchev, Leonid',
                                                  u'Lignell, Antti',
                                                  u'R\xe4s\xe4nen, Markku'))
                fail_author -= 1

        self.assertEqual(fail_agent, 0, "Invalid agent data")
        self.assertEqual(fail_author, 0, "Invalid author data")

コード例 #11

0

ファイルを表示

ファイル: test_unit.py プロジェクト: LondonAppDev/ckanext-oaipmh

    def test_dc_metadata_reader_fields(self):
        '''
        Test reading a whole file and check that fields are what they are supposed to be
        '''
        EXPECTED_FIELDS = {'access_application_URL': '',
                           'access_request_URL': '',
                           'algorithm': '',
                           'availability': 'through_provider',
                           'checksum': '',
                           'contact': [],
                           'direct_download_URL': u'http://link.aip.org/link/?jcp/123/064507',
                           'discipline': '',
                           'geographic_coverage': '',
                           #'langtitle': [{'lang': '',
                           #               'value': u'Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?'}],
                           'title': '{"und": "Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?"}',
                           'language': u'en',
                           'license_URL': u'Copyright 2005 American Institute of Physics. This article may be downloaded for personal use only. Any other use requires prior permission of the author and the American Institute of Physics.',
                           'license_id': 'notspecified',
                           'mimetype': '',
                           'name': 'http---link-aip-org-link--jcp-123-064507',
                           'notes': '{"und": ""}',
                           'pids': [{'type': 'data',
                                     'id': u'http://link.aip.org/link/?jcp/123/064507',
                                     'provider': u'http://helda.helsinki.fi/oai/request'},
                                    {'id': u'http://hdl.handle.net/10138/1074',
                                     'provider': u'http://helda.helsinki.fi/oai/request',
                                     'type': 'data'},
                                    ],
                           'tag_string': '',
                           'temporal_coverage_begin': '',
                           'temporal_coverage_end': '',
                           'through_provider_URL': u'http://link.aip.org/link/?jcp/123/064507',
                           'type': 'dataset',
                           'version': u'2005-08-08',
                           'uploader': u''}

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(self.xml))
        assert metadata

        data_dict = metadata['unified']

        temp = copy.copy(data_dict)

        temp.pop('agent')   # TODO: Compare also agents directly

        testfixtures.compare(temp, EXPECTED_FIELDS)

        # for (key, value) in EXPECTED_FIELDS.items():
        #     assert key in data_dict, "Key not found: %r" % key
        #
        #     output_value = data_dict.get(key)
        #
        #     # Note. Possibility for random fail, because data order is not promised by python
        #     # TODO: testfixtures.compare() could be used here to prevent random failing
        #     assert unicode(output_value) == unicode(value), "Values for key %r not matching: %r versus %r" % (
        #         key, value, output_value)

        fail_agent = 1
        fail_author = 3
        for agent in data_dict.get('agent', []):
            if agent['role'] == 'funder':
                for key, value in ('URL', ''), ('id', ''), ('fundingid', ''), ('name', ''):
                    self.assertTrue(key in agent, "Expected to find key %s" % key)
                    self.assertEquals(agent[key], value)
                fail_agent -= 1
            elif agent['role'] == 'author':
                self.assertTrue(agent['name'] in (u'Khriachtchev, Leonid', u'Lignell, Antti', u'R\xe4s\xe4nen, Markku'))
                fail_author -= 1

        self.assertEqual(fail_agent, 0, "Invalid agent data")
        self.assertEqual(fail_author, 0, "Invalid author data")