예제 #1
0
    def test_dc_metadata_reader(self):
        '''
        Test reading a whole file
        '''

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(self.xml))

        assert metadata

        assert 'unified' in metadata.getMap()
        assert 'availability' in metadata.getMap()['unified']
예제 #2
0
    def parse_xml(self, f, context, orig_url=None, strict=True):
        """ Parse XML and return package data dictionary.

        :param f: data as string
        :param context: CKAN context
        :param orig_url: orgininal URL
        :param strict: No used here, required by caller
        :return: package dictionary (used for package creation)
        """
        metadata = dc_metadata_reader('default')(etree.fromstring(f))
        return metadata['unified']
예제 #3
0
    def parse_xml(self, f, context, orig_url=None, strict=True):
        """ Parse XML and return package data dictionary.

        :param f: data as string
        :param context: CKAN context
        :param orig_url: orgininal URL
        :param strict: No used here, required by caller
        :return: package dictionary (used for package creation)
        """
        metadata = dc_metadata_reader('default')(etree.fromstring(f))
        return metadata['unified']
예제 #4
0
    def test_dc_metadata_reader(self):
        '''
        Test reading a whole file
        '''

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(
            self.xml))

        assert metadata

        assert 'unified' in metadata.getMap()
        assert 'availability' in metadata.getMap()['unified']
예제 #5
0
    def _run_import(self, xml, ida, config=None):
        if not model.User.get('harvest'):
            model.User(name='harvest', sysadmin=True).save()
        if not model.Group.get('test'):
            get_action('organization_create')({'user': '******'}, {'name': 'test'})

        record = _get_record(xml)
        harvest_type = 'ida' if ida else 'default'
        if config is None:
            config = {'type': harvest_type}

        metadata = dc_metadata_reader(harvest_type)(record)
        metadata['unified']['owner_org'] = "test"
        harvest_object = _FakeHarvestObject(json.dumps(metadata.getMap()), "test_id", config)

        self.harvester.import_stage(harvest_object)
예제 #6
0
def create_metadata_registry(harvest_type=None, service_url=None):
    '''Return new metadata registry with all common metadata readers

    The readers currently implemented are for metadataPrefixes
    oai_dc, nrd, rdf and xml.

    :returns: metadata registry instance
    :rtype: oaipmh.metadata.MetadataRegistry
    '''
    registry = om.MetadataRegistry()
    registry.registerReader('oai_dc', dc_metadata_reader(harvest_type or 'default'))
    registry.registerReader('cmdi0571', CmdiReader(service_url))
    registry.registerReader('oai_datacite3', DataCiteReader())
    registry.registerReader('nrd', nrd_metadata_reader)
    registry.registerReader('rdf', rdf_reader)
    registry.registerReader('xml', xml_reader)
    return registry
예제 #7
0
def create_metadata_registry(harvest_type=None, service_url=None):
    '''Return new metadata registry with all common metadata readers

    The readers currently implemented are for metadataPrefixes
    oai_dc, nrd, rdf and xml.

    :returns: metadata registry instance
    :rtype: oaipmh.metadata.MetadataRegistry
    '''
    registry = om.MetadataRegistry()
    registry.registerReader('oai_dc',
                            dc_metadata_reader(harvest_type or 'default'))
    registry.registerReader('cmdi0571', CmdiReader(service_url))
    registry.registerReader('oai_datacite3', DataCiteReader())
    registry.registerReader('nrd', nrd_metadata_reader)
    registry.registerReader('rdf', rdf_reader)
    registry.registerReader('xml', xml_reader)
    return registry
예제 #8
0
    def _run_import(self, xml, ida, config=None):
        if not model.User.get('harvest'):
            model.User(name='harvest', sysadmin=True).save()
        if not model.Group.get('test'):
            get_action('organization_create')({
                'user': '******'
            }, {
                'name': 'test'
            })

        record = _get_record(xml)
        harvest_type = 'ida' if ida else 'default'
        if config is None:
            config = {'type': harvest_type}

        metadata = dc_metadata_reader(harvest_type)(record)
        metadata['unified']['owner_org'] = "test"
        harvest_object = _FakeHarvestObject(json.dumps(metadata.getMap()),
                                            "test_id", config)

        self.harvester.import_stage(harvest_object)
예제 #9
0
 def parse_xml(self, f, context, orig_url=None, strict=True):
     metadata = dc_metadata_reader('ida')(etree.fromstring(f))
     return metadata['unified']
예제 #10
0
    def test_dc_metadata_reader_fields(self):
        '''
        Test reading a whole file and check that fields are what they are supposed to be
        '''
        EXPECTED_FIELDS = {
            'access_application_URL':
            '',
            'access_request_URL':
            '',
            'algorithm':
            '',
            'availability':
            'contact_owner',
            'checksum':
            '',
            'contact': [],
            'direct_download_URL':
            u'http://link.aip.org/link/?jcp/123/064507',
            'discipline':
            '',
            'geographic_coverage':
            '',
            #'langtitle': [{'lang': '',
            #               'value': u'Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?'}],
            'title':
            '{"und": "Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?"}',
            'language':
            u'en',
            'license_URL':
            u'Copyright 2005 American Institute of Physics. This article may be downloaded for personal use only. Any other use requires prior permission of the author and the American Institute of Physics.',
            'license_id':
            'notspecified',
            'mimetype':
            '',
            'notes':
            '{"und": ""}',
            'pids': [
                {
                    'type': u'relation',
                    'relation': u'generalRelation',
                    'id': u'http://link.aip.org/link/?jcp/123/064507',
                    'provider': u'http://helda.helsinki.fi/oai/request'
                },
                {
                    'id': u'http://hdl.handle.net/10138/1074',
                    'provider': u'http://helda.helsinki.fi/oai/request',
                    'type': u'relation',
                    'relation': u'generalRelation'
                },
            ],
            'tag_string':
            '',
            'temporal_coverage_begin':
            '',
            'temporal_coverage_end':
            '',
            'type':
            'dataset',
            'version':
            u'2005-08-08',
            'uploader':
            u''
        }

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(
            self.xml))
        assert metadata

        data_dict = metadata['unified']

        temp = copy.copy(data_dict)

        temp.pop('agent')  # TODO: Compare also agents directly

        temp.pop(
            'id')  # Do not compare id since it is always generated by Etsin
        temp.pop(
            'name')  # Do not compare id since it is always generated by Etsin

        ### Pid reform introduced some changes that can't be validated with a
        ### simple dict compare
        # Do not compare primary pid generated by Etsin
        temp['pids'] = [
            pid for pid in temp['pids'] if pid['type'] != u'primary'
        ]
        # Smear url value may be any data pid id
        if temp['smear_url'] in \
        ['http://link.aip.org/link/?jcp/123/064507', 'http://hdl.handle.net/10138/1074']:
            temp.pop('smear_url')

        testfixtures.compare(temp, EXPECTED_FIELDS)

        # for (key, value) in EXPECTED_FIELDS.items():
        #     assert key in data_dict, "Key not found: %r" % key
        #
        #     output_value = data_dict.get(key)
        #
        #     # Note. Possibility for random fail, because data order is not promised by python
        #     # TODO: testfixtures.compare() could be used here to prevent random failing
        #     assert unicode(output_value) == unicode(value), "Values for key %r not matching: %r versus %r" % (
        #         key, value, output_value)

        fail_agent = 1
        fail_author = 3
        for agent in data_dict.get('agent', []):
            if agent['role'] == 'funder':
                for key, value in ('URL', ''), ('id', ''), ('fundingid',
                                                            ''), ('name', ''):
                    self.assertTrue(key in agent,
                                    "Expected to find key %s" % key)
                    self.assertEquals(agent[key], value)
                fail_agent -= 1
            elif agent['role'] == 'author':
                self.assertTrue(agent['name'] in (u'Khriachtchev, Leonid',
                                                  u'Lignell, Antti',
                                                  u'R\xe4s\xe4nen, Markku'))
                fail_author -= 1

        self.assertEqual(fail_agent, 0, "Invalid agent data")
        self.assertEqual(fail_author, 0, "Invalid author data")
예제 #11
0
    def test_dc_metadata_reader_fields(self):
        '''
        Test reading a whole file and check that fields are what they are supposed to be
        '''
        EXPECTED_FIELDS = {'access_application_URL': '',
                           'access_request_URL': '',
                           'algorithm': '',
                           'availability': 'through_provider',
                           'checksum': '',
                           'contact': [],
                           'direct_download_URL': u'http://link.aip.org/link/?jcp/123/064507',
                           'discipline': '',
                           'geographic_coverage': '',
                           #'langtitle': [{'lang': '',
                           #               'value': u'Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?'}],
                           'title': '{"und": "Neutralization of solvated protons and formation of noble-gas hydride molecules: matrix-isolation indications of tunneling mechanisms?"}',
                           'language': u'en',
                           'license_URL': u'Copyright 2005 American Institute of Physics. This article may be downloaded for personal use only. Any other use requires prior permission of the author and the American Institute of Physics.',
                           'license_id': 'notspecified',
                           'mimetype': '',
                           'name': 'http---link-aip-org-link--jcp-123-064507',
                           'notes': '{"und": ""}',
                           'pids': [{'type': 'data',
                                     'id': u'http://link.aip.org/link/?jcp/123/064507',
                                     'provider': u'http://helda.helsinki.fi/oai/request'},
                                    {'id': u'http://hdl.handle.net/10138/1074',
                                     'provider': u'http://helda.helsinki.fi/oai/request',
                                     'type': 'data'},
                                    ],
                           'tag_string': '',
                           'temporal_coverage_begin': '',
                           'temporal_coverage_end': '',
                           'through_provider_URL': u'http://link.aip.org/link/?jcp/123/064507',
                           'type': 'dataset',
                           'version': u'2005-08-08',
                           'uploader': u''}

        metadata = dcr.dc_metadata_reader('default')(etree.fromstring(self.xml))
        assert metadata

        data_dict = metadata['unified']

        temp = copy.copy(data_dict)

        temp.pop('agent')   # TODO: Compare also agents directly

        testfixtures.compare(temp, EXPECTED_FIELDS)

        # for (key, value) in EXPECTED_FIELDS.items():
        #     assert key in data_dict, "Key not found: %r" % key
        #
        #     output_value = data_dict.get(key)
        #
        #     # Note. Possibility for random fail, because data order is not promised by python
        #     # TODO: testfixtures.compare() could be used here to prevent random failing
        #     assert unicode(output_value) == unicode(value), "Values for key %r not matching: %r versus %r" % (
        #         key, value, output_value)

        fail_agent = 1
        fail_author = 3
        for agent in data_dict.get('agent', []):
            if agent['role'] == 'funder':
                for key, value in ('URL', ''), ('id', ''), ('fundingid', ''), ('name', ''):
                    self.assertTrue(key in agent, "Expected to find key %s" % key)
                    self.assertEquals(agent[key], value)
                fail_agent -= 1
            elif agent['role'] == 'author':
                self.assertTrue(agent['name'] in (u'Khriachtchev, Leonid', u'Lignell, Antti', u'R\xe4s\xe4nen, Markku'))
                fail_author -= 1

        self.assertEqual(fail_agent, 0, "Invalid agent data")
        self.assertEqual(fail_author, 0, "Invalid author data")