class TestGatherMethods(HarvestFixtureBase):
    def setup(self):
        HarvestFixtureBase.setup(self)
        # Create source
        source_fixture = {
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'type': u'gemini-single'
        }
        source, job = self._create_source_and_job(source_fixture)
        self.harvester = GeminiHarvester()
        self.harvester.harvest_job = job

    def teardown(self):
        model.repo.rebuild_db()

    def test_get_gemini_string_and_guid(self):
        res = self.harvester.get_gemini_string_and_guid(BASIC_GEMINI, url=None)
        assert_equal(res, (BASIC_GEMINI, GUID))

    def test_get_gemini_string_and_guid__no_guid(self):
        res = self.harvester.get_gemini_string_and_guid(GEMINI_MISSING_GUID, url=None)
        assert_equal(res, (GEMINI_MISSING_GUID, ''))

    def test_get_gemini_string_and_guid__non_parsing(self):
        content = '<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco">' # no closing tag
        assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)

    def test_get_gemini_string_and_guid__empty(self):
        content = ''
        assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)
 def test_licence_url_multiple_urls(self):
     # only the first URL is extracted
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only',
          'http://www.test.gov.uk/licenseurl',
          'http://www.test.gov.uk/2nd_licenseurl']),
                  'http://www.test.gov.uk/licenseurl')
 def test_responsible_organisation_basic(self):
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'owner'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Maps Ltd (distributor)',
                                       'Ordnance Survey (owner)']))
 def test_responsible_organisation_blank_provider(self):
     # no owner or publisher, so blank provider
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'resourceProvider'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('', ['Maps Ltd (distributor)',
                        'Ordnance Survey (resourceProvider)']))
 def test_responsible_organisation_publisher(self):
     # no owner, so falls back to publisher
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Maps Ltd (distributor)',
                                       'Ordnance Survey (publisher)']))
 def setup(self):
     HarvestFixtureBase.setup(self)
     # Create source
     source_fixture = {
         'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
         'type': u'gemini-single'
     }
     source, job = self._create_source_and_job(source_fixture)
     self.harvester = GeminiHarvester()
     self.harvester.harvest_job = job
 def test_responsible_organisation_multiple_roles(self):
     # provider is the owner (ignores publisher)
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Ordnance Survey',
                                  'role': 'custodian'},
                                 {'organisation-name': 'Distributor',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Distributor (distributor)',
                                       'Ordnance Survey (publisher, custodian)',
                             ]))
 def test_responsible_organisation_owner(self):
     # provider is the owner (ignores publisher)
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Owner',
                                  'role': 'owner'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Owner', ['Owner (owner)',
                             'Maps Ltd (distributor)',
                             'Ordnance Survey (publisher)',
                             ]))
 def test_responsible_organisation_blank(self):
     # no owner or publisher, so blank provider
     responsible_organisation = []
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('', []))
 def test_licence_url_embedded_at_start(self):
     # URL is embedded at the start of the text field and the
     # whole field is returned. Noting this unusual behaviour
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['http://www.test.gov.uk/licenseurl Reference and PSMA Only']),
                  'http://www.test.gov.uk/licenseurl Reference and PSMA Only')
 def test_licence_url_embedded(self):
     # URL is embedded within the text field and not extracted
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only http://www.test.gov.uk/licenseurl']),
                  None)
 def test_licence_url_normal(self):
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only',
          'http://www.test.gov.uk/licenseurl']),
                  'http://www.test.gov.uk/licenseurl')