Esempio n. 1
0
 def test_reingest_async(self, mock_ingest):
     raw = factories.RawDatumFactory()
     job = IngestScheduler().reingest_async(raw.suid)
     assert job.claimed
     mock_ingest.delay.assert_called_once_with(job_id=job.id,
                                               exhaust=False,
                                               superfluous=True)
Esempio n. 2
0
    def test_schedule(self, raw_ages, selected_raw, claim, prior_status,
                      superfluous, expected_status):
        suid = factories.SourceUniqueIdentifierFactory()
        raws = [
            factories.RawDatumFactory(
                suid=suid, datestamp=pendulum.now().subtract(days=days_ago))
            for days_ago in raw_ages
        ]
        expected_raw = raws[selected_raw]

        expected_job = None
        if prior_status:
            expected_job = factories.IngestJobFactory(raw=expected_raw,
                                                      status=getattr(
                                                          IngestJob.STATUS,
                                                          prior_status))

        job = IngestScheduler().schedule(suid,
                                         claim=claim,
                                         superfluous=superfluous)

        if expected_job:
            assert job.id == expected_job.id
        assert job.suid_id == suid.id
        assert job.raw_id == expected_raw.id
        assert job.status == getattr(IngestJob.STATUS, expected_status)
        assert job.claimed == claim
Esempio n. 3
0
    def test_set_no_output(self):
        raw = factories.RawDatumFactory(datum=json.dumps({'@graph': []}))

        tasks.transform(raw.id)

        raw.refresh_from_db()

        assert raw.no_output is True
Esempio n. 4
0
    def test_does_not_set_no_output(self):
        raw = factories.RawDatumFactory(datum=json.dumps({'@graph': []}))

        factories.NormalizedDataFactory(raw=raw)

        tasks.transform(raw.id)

        raw.refresh_from_db()

        assert raw.no_output is None
Esempio n. 5
0
    def test_no_output(self):
        raw = factories.RawDatumFactory(datum=json.dumps({'@graph': []}))
        job = factories.IngestJobFactory(raw=raw)

        assert not raw.no_output

        ingest(job_id=job.id)

        raw.refresh_from_db()

        assert raw.no_output
Esempio n. 6
0
    def test_bulk_schedule(self, claim, superfluous):
        suid_specs = [
            # raw_ages, expected_raw, job_status
            ([0, 1, 2], 0, 'created'),
            ([5, 4, 2, 3], 2, 'failed'),
            ([2, 1], 1, 'succeeded'),
            ([4, 2], 1, None),
        ]
        suids = set()
        expected_jobs = set()
        for raw_ages, selected_raw, job_status in suid_specs:
            suid = factories.SourceUniqueIdentifierFactory()
            raws = [
                factories.RawDatumFactory(
                    suid=suid,
                    datestamp=pendulum.now().subtract(days=days_ago))
                for days_ago in raw_ages
            ]
            if job_status:
                job = factories.IngestJobFactory(raw=raws[selected_raw],
                                                 status=getattr(
                                                     IngestJob.STATUS,
                                                     job_status))
                expected_jobs.add(job)
            suids.add(suid)

        actual_jobs = IngestScheduler().bulk_schedule(
            SourceUniqueIdentifier.objects.all(),
            claim=claim,
            superfluous=superfluous,
        )

        assert len(actual_jobs) == len(suids)
        assert expected_jobs.issubset(actual_jobs)
        for job in actual_jobs:
            assert bool(job.claimed) == claim
            if superfluous:
                assert job.status == IngestJob.STATUS.created