class TestGatherMethods(HarvestFixtureBase): def setup(self): HarvestFixtureBase.setup(self) # Create source source_fixture = { 'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml', 'type': u'gemini-single' } source, job = self._create_source_and_job(source_fixture) self.harvester = GeminiHarvester() self.harvester.harvest_job = job def teardown(self): model.repo.rebuild_db() def test_get_gemini_string_and_guid(self): res = self.harvester.get_gemini_string_and_guid(BASIC_GEMINI, url=None) assert_equal(res, (BASIC_GEMINI, GUID)) def test_get_gemini_string_and_guid__no_guid(self): res = self.harvester.get_gemini_string_and_guid(GEMINI_MISSING_GUID, url=None) assert_equal(res, (GEMINI_MISSING_GUID, '')) def test_get_gemini_string_and_guid__non_parsing(self): content = '<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco">' # no closing tag assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content) def test_get_gemini_string_and_guid__empty(self): content = '' assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)
def test_licence_url_multiple_urls(self): # only the first URL is extracted assert_equal(GeminiHarvester._extract_first_licence_url( ['Reference and PSMA Only', 'http://www.test.gov.uk/licenseurl', 'http://www.test.gov.uk/2nd_licenseurl']), 'http://www.test.gov.uk/licenseurl')
def test_responsible_organisation_basic(self): responsible_organisation = [{'organisation-name': 'Ordnance Survey', 'role': 'owner'}, {'organisation-name': 'Maps Ltd', 'role': 'distributor'}] assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('Ordnance Survey', ['Maps Ltd (distributor)', 'Ordnance Survey (owner)']))
def test_responsible_organisation_blank_provider(self): # no owner or publisher, so blank provider responsible_organisation = [{'organisation-name': 'Ordnance Survey', 'role': 'resourceProvider'}, {'organisation-name': 'Maps Ltd', 'role': 'distributor'}] assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('', ['Maps Ltd (distributor)', 'Ordnance Survey (resourceProvider)']))
def test_responsible_organisation_publisher(self): # no owner, so falls back to publisher responsible_organisation = [{'organisation-name': 'Ordnance Survey', 'role': 'publisher'}, {'organisation-name': 'Maps Ltd', 'role': 'distributor'}] assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('Ordnance Survey', ['Maps Ltd (distributor)', 'Ordnance Survey (publisher)']))
def setup(self): HarvestFixtureBase.setup(self) # Create source source_fixture = { 'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml', 'type': u'gemini-single' } source, job = self._create_source_and_job(source_fixture) self.harvester = GeminiHarvester() self.harvester.harvest_job = job
def test_responsible_organisation_multiple_roles(self): # provider is the owner (ignores publisher) responsible_organisation = [{'organisation-name': 'Ordnance Survey', 'role': 'publisher'}, {'organisation-name': 'Ordnance Survey', 'role': 'custodian'}, {'organisation-name': 'Distributor', 'role': 'distributor'}] assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('Ordnance Survey', ['Distributor (distributor)', 'Ordnance Survey (publisher, custodian)', ]))
def test_responsible_organisation_owner(self): # provider is the owner (ignores publisher) responsible_organisation = [{'organisation-name': 'Ordnance Survey', 'role': 'publisher'}, {'organisation-name': 'Owner', 'role': 'owner'}, {'organisation-name': 'Maps Ltd', 'role': 'distributor'}] assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('Owner', ['Owner (owner)', 'Maps Ltd (distributor)', 'Ordnance Survey (publisher)', ]))
def test_responsible_organisation_blank(self): # no owner or publisher, so blank provider responsible_organisation = [] assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('', []))
def test_licence_url_embedded_at_start(self): # URL is embedded at the start of the text field and the # whole field is returned. Noting this unusual behaviour assert_equal(GeminiHarvester._extract_first_licence_url( ['http://www.test.gov.uk/licenseurl Reference and PSMA Only']), 'http://www.test.gov.uk/licenseurl Reference and PSMA Only')
def test_licence_url_embedded(self): # URL is embedded within the text field and not extracted assert_equal(GeminiHarvester._extract_first_licence_url( ['Reference and PSMA Only http://www.test.gov.uk/licenseurl']), None)
def test_licence_url_normal(self): assert_equal(GeminiHarvester._extract_first_licence_url( ['Reference and PSMA Only', 'http://www.test.gov.uk/licenseurl']), 'http://www.test.gov.uk/licenseurl')