def test_dc_metadata_reader(self): ''' Test reading a whole file ''' metadata = dcr.dc_metadata_reader('default')(etree.fromstring(self.xml)) assert metadata assert 'unified' in metadata.getMap() assert 'availability' in metadata.getMap()['unified']
def parse_xml(self, f, context, orig_url=None, strict=True): """ Parse XML and return package data dictionary. :param f: data as string :param context: CKAN context :param orig_url: orgininal URL :param strict: No used here, required by caller :return: package dictionary (used for package creation) """ metadata = dc_metadata_reader('default')(etree.fromstring(f)) return metadata['unified']
def test_dc_metadata_reader(self): ''' Test reading a whole file ''' metadata = dcr.dc_metadata_reader('default')(etree.fromstring( self.xml)) assert metadata assert 'unified' in metadata.getMap() assert 'availability' in metadata.getMap()['unified']
def _run_import(self, xml, ida, config=None): if not model.User.get('harvest'): model.User(name='harvest', sysadmin=True).save() if not model.Group.get('test'): get_action('organization_create')({'user': '******'}, {'name': 'test'}) record = _get_record(xml) harvest_type = 'ida' if ida else 'default' if config is None: config = {'type': harvest_type} metadata = dc_metadata_reader(harvest_type)(record) metadata['unified']['owner_org'] = "test" harvest_object = _FakeHarvestObject(json.dumps(metadata.getMap()), "test_id", config) self.harvester.import_stage(harvest_object)
def create_metadata_registry(harvest_type=None, service_url=None): '''Return new metadata registry with all common metadata readers The readers currently implemented are for metadataPrefixes oai_dc, nrd, rdf and xml. :returns: metadata registry instance :rtype: oaipmh.metadata.MetadataRegistry ''' registry = om.MetadataRegistry() registry.registerReader('oai_dc', dc_metadata_reader(harvest_type or 'default')) registry.registerReader('cmdi0571', CmdiReader(service_url)) registry.registerReader('oai_datacite3', DataCiteReader()) registry.registerReader('nrd', nrd_metadata_reader) registry.registerReader('rdf', rdf_reader) registry.registerReader('xml', xml_reader) return registry
def _run_import(self, xml, ida, config=None): if not model.User.get('harvest'): model.User(name='harvest', sysadmin=True).save() if not model.Group.get('test'): get_action('organization_create')({ 'user': '******' }, { 'name': 'test' }) record = _get_record(xml) harvest_type = 'ida' if ida else 'default' if config is None: config = {'type': harvest_type} metadata = dc_metadata_reader(harvest_type)(record) metadata['unified']['owner_org'] = "test" harvest_object = _FakeHarvestObject(json.dumps(metadata.getMap()), "test_id", config) self.harvester.import_stage(harvest_object)
def parse_xml(self, f, context, orig_url=None, strict=True): metadata = dc_metadata_reader('ida')(etree.fromstring(f)) return metadata['unified']
def test_dc_metadata_reader_fields(self): ''' Test reading a whole file and check that fields are what they are supposed to be ''' EXPECTED_FIELDS = { 'access_application_URL': '', 'access_request_URL': '', 'algorithm': '', 'availability': 'contact_owner', 'checksum': '', 'contact': [], 'direct_download_URL': u'http://link.aip.org/link/?jcp/123/064507', 'discipline': '', 'geographic_coverage': '', #'langtitle': [{'lang': '', # 'value': u'Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?'}], 'title': '{"und": "Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?"}', 'language': u'en', 'license_URL': u'Copyright 2005 American Institute of Physics. This article may be downloaded for personal use only. Any other use requires prior permission of the author and the American Institute of Physics.', 'license_id': 'notspecified', 'mimetype': '', 'notes': '{"und": ""}', 'pids': [ { 'type': u'relation', 'relation': u'generalRelation', 'id': u'http://link.aip.org/link/?jcp/123/064507', 'provider': u'http://helda.helsinki.fi/oai/request' }, { 'id': u'http://hdl.handle.net/10138/1074', 'provider': u'http://helda.helsinki.fi/oai/request', 'type': u'relation', 'relation': u'generalRelation' }, ], 'tag_string': '', 'temporal_coverage_begin': '', 'temporal_coverage_end': '', 'type': 'dataset', 'version': u'2005-08-08', 'uploader': u'' } metadata = dcr.dc_metadata_reader('default')(etree.fromstring( self.xml)) assert metadata data_dict = metadata['unified'] temp = copy.copy(data_dict) temp.pop('agent') # TODO: Compare also agents directly temp.pop( 'id') # Do not compare id since it is always generated by Etsin temp.pop( 'name') # Do not compare id since it is always generated by Etsin ### Pid reform introduced some changes that can't be validated with a ### simple dict compare # Do not compare primary pid generated by Etsin temp['pids'] = [ pid for pid in temp['pids'] if pid['type'] != u'primary' ] # Smear url value may be any data pid id if temp['smear_url'] in \ ['http://link.aip.org/link/?jcp/123/064507', 'http://hdl.handle.net/10138/1074']: temp.pop('smear_url') testfixtures.compare(temp, EXPECTED_FIELDS) # for (key, value) in EXPECTED_FIELDS.items(): # assert key in data_dict, "Key not found: %r" % key # # output_value = data_dict.get(key) # # # Note. Possibility for random fail, because data order is not promised by python # # TODO: testfixtures.compare() could be used here to prevent random failing # assert unicode(output_value) == unicode(value), "Values for key %r not matching: %r versus %r" % ( # key, value, output_value) fail_agent = 1 fail_author = 3 for agent in data_dict.get('agent', []): if agent['role'] == 'funder': for key, value in ('URL', ''), ('id', ''), ('fundingid', ''), ('name', ''): self.assertTrue(key in agent, "Expected to find key %s" % key) self.assertEquals(agent[key], value) fail_agent -= 1 elif agent['role'] == 'author': self.assertTrue(agent['name'] in (u'Khriachtchev, Leonid', u'Lignell, Antti', u'R\xe4s\xe4nen, Markku')) fail_author -= 1 self.assertEqual(fail_agent, 0, "Invalid agent data") self.assertEqual(fail_author, 0, "Invalid author data")
def test_dc_metadata_reader_fields(self): ''' Test reading a whole file and check that fields are what they are supposed to be ''' EXPECTED_FIELDS = {'access_application_URL': '', 'access_request_URL': '', 'algorithm': '', 'availability': 'through_provider', 'checksum': '', 'contact': [], 'direct_download_URL': u'http://link.aip.org/link/?jcp/123/064507', 'discipline': '', 'geographic_coverage': '', #'langtitle': [{'lang': '', # 'value': u'Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?'}], 'title': '{"und": "Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?"}', 'language': u'en', 'license_URL': u'Copyright 2005 American Institute of Physics. This article may be downloaded for personal use only. Any other use requires prior permission of the author and the American Institute of Physics.', 'license_id': 'notspecified', 'mimetype': '', 'name': 'http---link-aip-org-link--jcp-123-064507', 'notes': '{"und": ""}', 'pids': [{'type': 'data', 'id': u'http://link.aip.org/link/?jcp/123/064507', 'provider': u'http://helda.helsinki.fi/oai/request'}, {'id': u'http://hdl.handle.net/10138/1074', 'provider': u'http://helda.helsinki.fi/oai/request', 'type': 'data'}, ], 'tag_string': '', 'temporal_coverage_begin': '', 'temporal_coverage_end': '', 'through_provider_URL': u'http://link.aip.org/link/?jcp/123/064507', 'type': 'dataset', 'version': u'2005-08-08', 'uploader': u''} metadata = dcr.dc_metadata_reader('default')(etree.fromstring(self.xml)) assert metadata data_dict = metadata['unified'] temp = copy.copy(data_dict) temp.pop('agent') # TODO: Compare also agents directly testfixtures.compare(temp, EXPECTED_FIELDS) # for (key, value) in EXPECTED_FIELDS.items(): # assert key in data_dict, "Key not found: %r" % key # # output_value = data_dict.get(key) # # # Note. Possibility for random fail, because data order is not promised by python # # TODO: testfixtures.compare() could be used here to prevent random failing # assert unicode(output_value) == unicode(value), "Values for key %r not matching: %r versus %r" % ( # key, value, output_value) fail_agent = 1 fail_author = 3 for agent in data_dict.get('agent', []): if agent['role'] == 'funder': for key, value in ('URL', ''), ('id', ''), ('fundingid', ''), ('name', ''): self.assertTrue(key in agent, "Expected to find key %s" % key) self.assertEquals(agent[key], value) fail_agent -= 1 elif agent['role'] == 'author': self.assertTrue(agent['name'] in (u'Khriachtchev, Leonid', u'Lignell, Antti', u'R\xe4s\xe4nen, Markku')) fail_author -= 1 self.assertEqual(fail_agent, 0, "Invalid agent data") self.assertEqual(fail_author, 0, "Invalid author data")