Ejemplo n.º 1
0
class TestGatherMethods(HarvestFixtureBase):
    def setup(self):
        HarvestFixtureBase.setup(self)
        # Create source
        source_fixture = {
			'title': 'Test Source',
			'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single'
        }
        source, job = self._create_source_and_job(source_fixture)
        self.harvester = GeminiHarvester()
        self.harvester.harvest_job = job

    def teardown(self):
        model.repo.rebuild_db()

    def test_get_gemini_string_and_guid(self):
        res = self.harvester.get_gemini_string_and_guid(BASIC_GEMINI, url=None)
        assert_equal(res, (BASIC_GEMINI, GUID))

    def test_get_gemini_string_and_guid__no_guid(self):
        res = self.harvester.get_gemini_string_and_guid(GEMINI_MISSING_GUID, url=None)
        assert_equal(res, (GEMINI_MISSING_GUID, ''))

    def test_get_gemini_string_and_guid__non_parsing(self):
        content = '<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco">' # no closing tag
        assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)

    def test_get_gemini_string_and_guid__empty(self):
        content = ''
        assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)
Ejemplo n.º 2
0
def test_gen_new_name(mock_ckan_session_query):
    class MockSessionQuery:
        def __init__(self, name):
            self.name = name
            self.first_query = True

        def filter(self, *arg):
            mock_return = Mock()
            if self.first_query:
                self.first_query = False

                if self.name:
                    mock_pkg_query = Mock()
                    mock_pkg_query.name = self.name
                else:
                    mock_pkg_query = None
                mock_return.order_by.return_value.first.return_value = mock_pkg_query
                return mock_return
            else:
                mock_return = Mock()
                mock_return.first.return_value = None
                return mock_return

    harvester = GeminiHarvester()

    mock_ckan_session_query.return_value = MockSessionQuery(None)
    assert harvester.gen_new_name('Some test') == 'some-test'

    mock_ckan_session_query.return_value = MockSessionQuery('some-test')
    assert harvester.gen_new_name('Some test') == 'some-test1'

    mock_ckan_session_query.return_value = MockSessionQuery('some-test100')
    assert harvester.gen_new_name('Some test') == 'some-test101'
Ejemplo n.º 3
0
class TestGatherMethods(HarvestFixtureBase):
    def setup(self):
        HarvestFixtureBase.setup(self)
        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single'
        }
        source, job = self._create_source_and_job(source_fixture)
        self.harvester = GeminiHarvester()
        self.harvester.harvest_job = job

    def teardown(self):
        model.repo.rebuild_db()

    def test_get_gemini_string_and_guid(self):
        res = self.harvester.get_gemini_string_and_guid(BASIC_GEMINI, url=None)
        assert_equal(res, (BASIC_GEMINI, GUID))

    def test_get_gemini_string_and_guid__no_guid(self):
        res = self.harvester.get_gemini_string_and_guid(GEMINI_MISSING_GUID, url=None)
        assert_equal(res, (GEMINI_MISSING_GUID, ''))

    def test_get_gemini_string_and_guid__non_parsing(self):
        content = '<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco">' # no closing tag
        assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)

    def test_get_gemini_string_and_guid__empty(self):
        content = ''
        assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content)
Ejemplo n.º 4
0
    def setup(self):
        HarvestFixtureBase.setup(self)
        # Create source
        source_fixture = {
			'title': 'Test Source',
			'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single'
        }
        source, job = self._create_source_and_job(source_fixture)
        self.harvester = GeminiHarvester()
        self.harvester.harvest_job = job
Ejemplo n.º 5
0
 def test_licence_url_embedded(self):
     ''' '''
     # URL is embedded within the text field and not extracted
     assert_equal(
         GeminiHarvester._extract_first_licence_url([
             u'Reference and PSMA Only http://www.test.gov.uk/licenseurl'
         ]), None)
Ejemplo n.º 6
0
 def test_licence_url_multiple_urls(self):
     # only the first URL is extracted
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only',
          'http://www.test.gov.uk/licenseurl',
          'http://www.test.gov.uk/2nd_licenseurl']),
                  'http://www.test.gov.uk/licenseurl')
Ejemplo n.º 7
0
 def test_licence_url_multiple_urls(self):
     # only the first URL is extracted
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only',
          'http://www.test.gov.uk/licenseurl',
          'http://www.test.gov.uk/2nd_licenseurl']),
                  'http://www.test.gov.uk/licenseurl')
Ejemplo n.º 8
0
 def test_licence_url_embedded_at_start(self):
     # URL is embedded at the start of the text field and the
     # whole field is returned. Noting this unusual behaviour
     assert_equal(
         GeminiHarvester._extract_first_licence_url(
             ['http://www.test.gov.uk/licenseurl Reference and PSMA Only']),
         'http://www.test.gov.uk/licenseurl Reference and PSMA Only')
Ejemplo n.º 9
0
 def test_responsible_organisation_blank(self):
     ''' '''
     # no owner or publisher, so blank provider
     responsible_organisation = []
     assert_equal(
         GeminiHarvester._process_responsible_organisation(
             responsible_organisation), (u'', []))
Ejemplo n.º 10
0
 def test_licence_url_normal(self):
     ''' '''
     assert_equal(
         GeminiHarvester._extract_first_licence_url([
             u'Reference and PSMA Only',
             u'http://www.test.gov.uk/licenseurl'
         ]), u'http://www.test.gov.uk/licenseurl')
Ejemplo n.º 11
0
 def test_responsible_organisation_basic(self):
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'owner'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Maps Ltd (distributor)',
                                       'Ordnance Survey (owner)']))
Ejemplo n.º 12
0
 def test_responsible_organisation_basic(self):
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'owner'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Maps Ltd (distributor)',
                                       'Ordnance Survey (owner)']))
Ejemplo n.º 13
0
 def test_responsible_organisation_publisher(self):
     # no owner, so falls back to publisher
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Maps Ltd (distributor)',
                                       'Ordnance Survey (publisher)']))
Ejemplo n.º 14
0
 def test_responsible_organisation_publisher(self):
     # no owner, so falls back to publisher
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Maps Ltd (distributor)',
                                       'Ordnance Survey (publisher)']))
Ejemplo n.º 15
0
 def test_responsible_organisation_blank_provider(self):
     # no owner or publisher, so blank provider
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'resourceProvider'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('', ['Maps Ltd (distributor)',
                        'Ordnance Survey (resourceProvider)']))
Ejemplo n.º 16
0
 def test_responsible_organisation_blank_provider(self):
     # no owner or publisher, so blank provider
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'resourceProvider'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('', ['Maps Ltd (distributor)',
                        'Ordnance Survey (resourceProvider)']))
Ejemplo n.º 17
0
 def test_responsible_organisation_multiple_roles(self):
     # provider is the owner (ignores publisher)
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Ordnance Survey',
                                  'role': 'custodian'},
                                 {'organisation-name': 'Distributor',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Distributor (distributor)',
                                       'Ordnance Survey (publisher, custodian)',
                             ]))
Ejemplo n.º 18
0
 def setup(self):
     HarvestFixtureBase.setup(self)
     # Create source
     source_fixture = {
         'title': 'Test Source',
         'name': 'test-source',
         'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
         'source_type': u'gemini-single'
     }
     source, job = self._create_source_and_job(source_fixture)
     self.harvester = GeminiHarvester()
     self.harvester.harvest_job = job
Ejemplo n.º 19
0
 def test_responsible_organisation_multiple_roles(self):
     # provider is the owner (ignores publisher)
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Ordnance Survey',
                                  'role': 'custodian'},
                                 {'organisation-name': 'Distributor',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Ordnance Survey', ['Distributor (distributor)',
                                       'Ordnance Survey (publisher, custodian)',
                             ]))
Ejemplo n.º 20
0
 def test_responsible_organisation_owner(self):
     # provider is the owner (ignores publisher)
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Owner',
                                  'role': 'owner'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Owner', ['Owner (owner)',
                             'Maps Ltd (distributor)',
                             'Ordnance Survey (publisher)',
                             ]))
Ejemplo n.º 21
0
 def test_responsible_organisation_owner(self):
     # provider is the owner (ignores publisher)
     responsible_organisation = [{'organisation-name': 'Ordnance Survey',
                                  'role': 'publisher'},
                                 {'organisation-name': 'Owner',
                                  'role': 'owner'},
                                 {'organisation-name': 'Maps Ltd',
                                  'role': 'distributor'}]
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('Owner', ['Owner (owner)',
                             'Maps Ltd (distributor)',
                             'Ordnance Survey (publisher)',
                             ]))
Ejemplo n.º 22
0
 def test_licence_url_embedded_at_start(self):
     # URL is embedded at the start of the text field and the
     # whole field is returned. Noting this unusual behaviour
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['http://www.test.gov.uk/licenseurl Reference and PSMA Only']),
                  'http://www.test.gov.uk/licenseurl Reference and PSMA Only')
Ejemplo n.º 23
0
 def test_licence_url_embedded(self):
     # URL is embedded within the text field and not extracted
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only http://www.test.gov.uk/licenseurl']),
                  None)
Ejemplo n.º 24
0
 def test_licence_url_normal(self):
     assert_equal(GeminiHarvester._extract_first_licence_url(
         ['Reference and PSMA Only',
          'http://www.test.gov.uk/licenseurl']),
                  'http://www.test.gov.uk/licenseurl')
Ejemplo n.º 25
0
 def test_responsible_organisation_blank(self):
     # no owner or publisher, so blank provider
     responsible_organisation = []
     assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation),
                  ('', []))