def test_gemini_waf_extract_urls_report_link_errors(mock_save_gather_error):
    with open(
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
        '..',
        '..',
        'data',
        'sample-waf.html')
    ) as f:
        gemini = GeminiWafHarvester()
        gemini.harvest_job = Mock()
        urls = gemini._extract_urls(f.read(), 'http://test.co.uk/xml')

        assert urls == [
            'http://test.co.uk/xml/AddressBase.xml',
            'http://test.co.uk/xml/SourcePoint.xml',
            'http://test.co.uk/xml/RealImagery.xml'
        ] 
        assert mock_save_gather_error.call_args_list == [
            call('Ignoring link in WAF because it has "/": /xml/BoundaryLine.xml', gemini.harvest_job),
            call('Ignoring link in WAF because it has "/": /xml/SmallScale.xml', gemini.harvest_job)
        ]
Exemple #2
0
    def test_harvest_basic(self):

        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1-waf/index.html',
            'source_type': u'gemini-waf'
        }

        source, job = self._create_source_and_job(source_fixture)

        harvester = GeminiWafHarvester()

        # We need to send an actual job, not the dict
        object_ids = harvester.gather_stage(job)

        assert len(object_ids) == 2

        # Fetch stage always returns True for Waf harvesters
        assert harvester.fetch_stage(object_ids) == True

        objects = []
        for object_id in object_ids:
            obj = HarvestObject.get(object_id)
            assert obj
            objects.append(obj)
            harvester.import_stage(obj)

        pkgs = Session.query(Package).filter(Package.type!=u'harvest').all()

        assert_equal(len(pkgs), 2)

        pkg_ids = [pkg.id for pkg in pkgs]

        for obj in objects:
            assert obj.current == True
            assert obj.package_id in pkg_ids
Exemple #3
0
    def test_harvest_basic(self):

        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1-waf/index.html',
            'source_type': u'gemini-waf'
        }

        source, job = self._create_source_and_job(source_fixture)

        harvester = GeminiWafHarvester()

        # We need to send an actual job, not the dict
        object_ids = harvester.gather_stage(job)

        assert len(object_ids) == 2

        # Fetch stage always returns True for Waf harvesters
        assert harvester.fetch_stage(object_ids) == True

        objects = []
        for object_id in object_ids:
            obj = HarvestObject.get(object_id)
            assert obj
            objects.append(obj)
            harvester.import_stage(obj)

        pkgs = Session.query(Package).filter(
            Package.type != u'harvest_source').all()

        assert_equal(len(pkgs), 2)

        pkg_ids = [pkg.id for pkg in pkgs]

        for obj in objects:
            assert obj.current == True
            assert obj.package_id in pkg_ids