def test_pointless(self): job = factories.IngestJobFactory( raw__datestamp=pendulum.now().subtract(hours=2)) factories.IngestJobFactory( suid=job.suid, raw__datestamp=pendulum.now().subtract(hours=1)) ingest(job_id=job.id) job.refresh_from_db() assert job.status == job.STATUS.skipped assert job.error_context == job.SkipReasons.pointless.value
def test_schedule(self, raw_ages, selected_raw, claim, prior_status, superfluous, expected_status): suid = factories.SourceUniqueIdentifierFactory() raws = [ factories.RawDatumFactory( suid=suid, datestamp=pendulum.now().subtract(days=days_ago)) for days_ago in raw_ages ] expected_raw = raws[selected_raw] expected_job = None if prior_status: expected_job = factories.IngestJobFactory(raw=expected_raw, status=getattr( IngestJob.STATUS, prior_status)) job = IngestScheduler().schedule(suid, claim=claim, superfluous=superfluous) if expected_job: assert job.id == expected_job.id assert job.suid_id == suid.id assert job.raw_id == expected_raw.id assert job.status == getattr(IngestJob.STATUS, expected_status) assert job.claimed == claim
def test_no_output(self): raw = factories.RawDatumFactory(datum=json.dumps({'@graph': []})) job = factories.IngestJobFactory(raw=raw) assert not raw.no_output ingest(job_id=job.id) raw.refresh_from_db() assert raw.no_output
def test_bulk_reingest(self, mock_ingest): with mock.patch('share.ingest.scheduler.IngestScheduler.bulk_schedule' ) as mock_bulk_schedule: jobs = [factories.IngestJobFactory() for _ in range(10)] mock_bulk_schedule.return_value = jobs actual_jobs = IngestScheduler().bulk_reingest( mock.sentinel.suid_qs) mock_bulk_schedule.assert_called_once_with(mock.sentinel.suid_qs, superfluous=True, claim=True) assert actual_jobs is jobs assert mock_ingest.delay.call_args_list == [({ 'job_id': j.id, 'exhaust': False, 'superfluous': True, }, ) for j in actual_jobs]
def test_legacy_pipeline(self, legacy, monkeypatch): mock_apply_changes = mock.Mock(return_value=[]) monkeypatch.setattr( 'share.tasks.jobs.IngestJobConsumer._apply_changes', mock_apply_changes) monkeypatch.setattr('django.conf.settings.SHARE_LEGACY_PIPELINE', legacy) g = MutableGraph() g.add_node('_:id', 'creativework', title='This is a title') job = factories.IngestJobFactory( raw__datum=json.dumps({'@graph': g.to_jsonld(in_edges=False)})) ingest.apply(kwargs={'job_id': job.id}, throw=True) if legacy: assert NormalizedData.objects.count() == 1 assert mock_apply_changes.call_count == 1 else: assert NormalizedData.objects.count() == 0 assert not mock_apply_changes.called
def test_bulk_schedule(self, claim, superfluous): suid_specs = [ # raw_ages, expected_raw, job_status ([0, 1, 2], 0, 'created'), ([5, 4, 2, 3], 2, 'failed'), ([2, 1], 1, 'succeeded'), ([4, 2], 1, None), ] suids = set() expected_jobs = set() for raw_ages, selected_raw, job_status in suid_specs: suid = factories.SourceUniqueIdentifierFactory() raws = [ factories.RawDatumFactory( suid=suid, datestamp=pendulum.now().subtract(days=days_ago)) for days_ago in raw_ages ] if job_status: job = factories.IngestJobFactory(raw=raws[selected_raw], status=getattr( IngestJob.STATUS, job_status)) expected_jobs.add(job) suids.add(suid) actual_jobs = IngestScheduler().bulk_schedule( SourceUniqueIdentifier.objects.all(), claim=claim, superfluous=superfluous, ) assert len(actual_jobs) == len(suids) assert expected_jobs.issubset(actual_jobs) for job in actual_jobs: assert bool(job.claimed) == claim if superfluous: assert job.status == IngestJob.STATUS.created