def test_write_package_and_delete_source(self): """Create a package, then ensure that deleting its source doesn't delete the package. """ raise SkipTest('This needs fixing, but JG is going to refactor this. 2011-2-10.') url = self.gemini_example.url_for(file_index=0) source = HarvestSource(url=url) count_before_write = self.count_packages() job = HarvestingJob(source=source, user_ref="me") controller = HarvestingJobController(job) controller.harvest_documents() count_after_write = self.count_packages() self.assert_equal(count_after_write, count_before_write + 1) self.delete_commit(source) count_after_delete = self.count_packages() self.assert_equal(count_after_delete, count_after_write)
def setup(self): super(TestHarvestingJob, self).setup() self.source = HarvestSource( url=self.gemini_example.url_for(file_index=0) ) self.job = HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) self.job.save() self.controller = HarvestingJobController(self.job) self.job2 = None self.source2 = None
def run_harvester(self, *args, **kwds): from pylons.i18n.translation import _get_translator import pylons pylons.translator._push_object(_get_translator(pylons.config.get("lang"))) from ckan.model import HarvestingJob from ckan.controllers.harvesting import HarvestingJobController jobs = HarvestingJob.filter(status=u"New").all() jobs_len = len(jobs) jobs_count = 0 if jobs_len: print "Running %s harvesting jobs..." % jobs_len else: print "There are no new harvesting jobs." print "" for job in jobs: jobs_count += 1 print "Running job %s/%s: %s" % (jobs_count, jobs_len, job.id) self.print_harvesting_job(job) job_controller = HarvestingJobController(job) job_controller.harvest_documents()
class TestHarvestingJob(HarvesterTestCase): fixture_user_ref = u'publisheruser1' def setup(self): super(TestHarvestingJob, self).setup() self.source = HarvestSource( url=self.gemini_example.url_for(file_index=0) ) self.job = HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) self.job.save() self.controller = HarvestingJobController(self.job) self.job2 = None self.source2 = None def teardown(self): if self.job2: self.delete(self.job2) if self.source2: self.delete(self.source2) super(TestHarvestingJob, self).teardown() def test_create_and_delete_job(self): self.assert_equal(self.job.source_id, self.source.id) self.delete_commit(self.job) self.assert_raises(Exception, HarvestingJob.get, self.job.id) # - check source has not been deleted! HarvestSource.get(self.source.id) def test_harvest_documents(self): before_count = self.count_packages() job = self.controller.harvest_documents() after_count = self.count_packages() self.assert_equal(after_count, before_count + 1) self.assert_equal(job.source.documents[0].package.id, (job.report['packages'][0])) self.assert_true(job.report) self.assert_len(job.report['errors'], 0) self.assert_len(job.report['packages'], 1) def test_harvest_documents_twice_unchanged(self): job = self.controller.harvest_documents() self.assert_len(job.report['errors'], 0) self.assert_len(job.report['packages'], 1) job2 = HarvestingJobController( HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) ).harvest_documents() self.assert_len(job2.report['errors'], 0) self.assert_len(job2.report['packages'], 0) def test_harvest_documents_twice_changed(self): job = self.controller.harvest_documents() self.assert_len(job.report['errors'], 0) self.assert_len(job.report['packages'], 1) self.source.url = self.gemini_example.url_for(file_index=2) self.source.save() job2 = HarvestingJobController( HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) ).harvest_documents() self.assert_len(job2.report['errors'], 0) self.assert_len(job2.report['packages'], 1) def test_harvest_documents_source_guid_contention(self): job = self.controller.harvest_documents() source2 = HarvestSource( url=self.gemini_example.url_for(file_index=2) ) job2 = HarvestingJobController( HarvestingJob( source=source2, user_ref=self.fixture_user_ref ) ).harvest_documents() error = job2.report['errors'][0] self.assert_contains(error, "Another source is using metadata GUID") def test_harvest_bad_source_url(self): source = HarvestSource( url=self.gemini_example.url_for_bad(0) ) job = HarvestingJob( source=source, user_ref=self.fixture_user_ref ) before_count = self.count_packages() self.assert_false(job.report) job = HarvestingJobController(job).harvest_documents() after_count = self.count_packages() self.assert_equal(after_count, before_count) self.assert_len(job.report['packages'], 0) self.assert_len(job.report['errors'], 1) error = job.report['errors'][0] self.assert_contains(error, 'Unable to detect source type from content')