Beispiel #1
0
 def test_write_package_and_delete_source(self):
     """Create a package, then ensure that deleting its source
     doesn't delete the package.
     """
     raise SkipTest('This needs fixing, but JG is going to refactor this. 2011-2-10.')
     url = self.gemini_example.url_for(file_index=0)
     source = HarvestSource(url=url)
     count_before_write = self.count_packages()
     job = HarvestingJob(source=source,
                         user_ref="me")
     controller = HarvestingJobController(job)
     controller.harvest_documents()
     count_after_write = self.count_packages()
     self.assert_equal(count_after_write, count_before_write + 1)
     self.delete_commit(source)
     count_after_delete = self.count_packages()
     self.assert_equal(count_after_delete, count_after_write)
Beispiel #2
0
 def setup(self):
     super(TestHarvestingJob, self).setup()
     self.source = HarvestSource(
         url=self.gemini_example.url_for(file_index=0)
     )
     self.job = HarvestingJob(
         source=self.source,
         user_ref=self.fixture_user_ref
     )
     self.job.save()
     self.controller = HarvestingJobController(self.job)
     self.job2 = None
     self.source2 = None
Beispiel #3
0
    def run_harvester(self, *args, **kwds):
        from pylons.i18n.translation import _get_translator
        import pylons

        pylons.translator._push_object(_get_translator(pylons.config.get("lang")))

        from ckan.model import HarvestingJob
        from ckan.controllers.harvesting import HarvestingJobController

        jobs = HarvestingJob.filter(status=u"New").all()
        jobs_len = len(jobs)
        jobs_count = 0
        if jobs_len:
            print "Running %s harvesting jobs..." % jobs_len
        else:
            print "There are no new harvesting jobs."
        print ""
        for job in jobs:
            jobs_count += 1
            print "Running job %s/%s: %s" % (jobs_count, jobs_len, job.id)
            self.print_harvesting_job(job)
            job_controller = HarvestingJobController(job)
            job_controller.harvest_documents()
Beispiel #4
0
class TestHarvestingJob(HarvesterTestCase):

    fixture_user_ref = u'publisheruser1'

    def setup(self):
        super(TestHarvestingJob, self).setup()
        self.source = HarvestSource(
            url=self.gemini_example.url_for(file_index=0)
        )
        self.job = HarvestingJob(
            source=self.source,
            user_ref=self.fixture_user_ref
        )
        self.job.save()
        self.controller = HarvestingJobController(self.job)
        self.job2 = None
        self.source2 = None

    def teardown(self):
        if self.job2:
            self.delete(self.job2)
        if self.source2:
            self.delete(self.source2)
        super(TestHarvestingJob, self).teardown()

    def test_create_and_delete_job(self):
        self.assert_equal(self.job.source_id, self.source.id)
        self.delete_commit(self.job)
        self.assert_raises(Exception, HarvestingJob.get, self.job.id)
        # - check source has not been deleted!
        HarvestSource.get(self.source.id)

    def test_harvest_documents(self):
        before_count = self.count_packages()
        job = self.controller.harvest_documents()
        after_count = self.count_packages()
        self.assert_equal(after_count, before_count + 1)
        self.assert_equal(job.source.documents[0].package.id,
                          (job.report['packages'][0]))
        self.assert_true(job.report)
        self.assert_len(job.report['errors'], 0)
        self.assert_len(job.report['packages'], 1)

    def test_harvest_documents_twice_unchanged(self):
        job = self.controller.harvest_documents()
        self.assert_len(job.report['errors'], 0)
        self.assert_len(job.report['packages'], 1)
        job2 = HarvestingJobController(
            HarvestingJob(
                source=self.source,
                user_ref=self.fixture_user_ref
                )
            ).harvest_documents()
        self.assert_len(job2.report['errors'], 0)
        self.assert_len(job2.report['packages'], 0)

    def test_harvest_documents_twice_changed(self):
        job = self.controller.harvest_documents()
        self.assert_len(job.report['errors'], 0)
        self.assert_len(job.report['packages'], 1)
        self.source.url = self.gemini_example.url_for(file_index=2)
        self.source.save()
        job2 = HarvestingJobController(
            HarvestingJob(
                source=self.source,
                user_ref=self.fixture_user_ref
                )
            ).harvest_documents()
        self.assert_len(job2.report['errors'], 0)
        self.assert_len(job2.report['packages'], 1)

    def test_harvest_documents_source_guid_contention(self):
        job = self.controller.harvest_documents()
        source2 = HarvestSource(
            url=self.gemini_example.url_for(file_index=2)
            )
        job2 = HarvestingJobController(
            HarvestingJob(
                source=source2,
                user_ref=self.fixture_user_ref
                )
            ).harvest_documents()
        error = job2.report['errors'][0]
        self.assert_contains(error, "Another source is using metadata GUID")

    def test_harvest_bad_source_url(self):
        source = HarvestSource(
            url=self.gemini_example.url_for_bad(0)
            )
        job = HarvestingJob(
            source=source,
            user_ref=self.fixture_user_ref
            )
        before_count = self.count_packages()
        self.assert_false(job.report)
        job = HarvestingJobController(job).harvest_documents()
        after_count = self.count_packages()
        self.assert_equal(after_count, before_count)
        self.assert_len(job.report['packages'], 0)
        self.assert_len(job.report['errors'], 1)
        error = job.report['errors'][0]
        self.assert_contains(error,
                             'Unable to detect source type from content')