Beispiel #1
0
    def test_autoupdate(self, completions, status, new_version, updated):
        source_config = factories.SourceConfigFactory()

        source_config.harvester.get_class().VERSION = 1

        hl = factories.HarvestJobFactory(
            status=status,
            completions=completions,
            harvester_version=source_config.harvester.version,
            source_config=source_config,
            start_date=pendulum.parse('2017-01-01').date(),
        )

        source_config.harvester.get_class().VERSION = new_version

        tasks.harvest(job_id=hl.id)

        hl.refresh_from_db()

        if updated:
            assert hl.status == HarvestJob.STATUS.succeeded
        elif new_version > 1:
            assert hl.status == HarvestJob.STATUS.skipped
            assert hl.error_context == HarvestJob.SkipReasons.obsolete.value

        assert (hl.harvester_version == new_version) == updated
Beispiel #2
0
 def test_latest_date_null(self):
     source_config = factories.SourceConfigFactory(
         full_harvest=True,
         earliest_date=pendulum.parse('2017-01-01').date())
     assert len(
         HarvestScheduler(source_config).all(
             cutoff=pendulum.parse('2018-01-01').date())) == 365
Beispiel #3
0
    def test_obsolete(self):
        source_config = factories.SourceConfigFactory()

        hlv1 = factories.HarvestJobFactory(
            harvester_version=source_config.harvester.version,
            source_config=source_config,
            start_date=pendulum.parse('2017-01-01').date(),
        )

        old_version = source_config.harvester.get_class().VERSION
        source_config.harvester.get_class().VERSION += 1
        new_version = source_config.harvester.get_class().VERSION

        hlv2 = factories.HarvestJobFactory(
            harvester_version=source_config.harvester.version,
            source_config=source_config,
            start_date=pendulum.parse('2017-01-01').date(),
        )

        tasks.harvest(job_id=hlv2.id)
        tasks.harvest(job_id=hlv1.id)

        hlv1.refresh_from_db()
        hlv2.refresh_from_db()

        assert hlv2.status == HarvestJob.STATUS.succeeded
        assert hlv2.harvester_version == new_version

        assert hlv1.status == HarvestJob.STATUS.skipped
        assert hlv1.harvester_version == old_version
        assert hlv1.error_context == HarvestJob.SkipReasons.obsolete.value
Beispiel #4
0
    def test_resumption_tokens(self, monkeypatch):
        harvester = OAIHarvester(factories.SourceConfigFactory(harvester_kwargs={'metadata_prefix': 'oai_dc'}))
        monkeypatch.setattr(harvester, 'fetch_page', mock.Mock(side_effect=(
            ([self.OAI_DC_RECORD], 'token'),
            ([self.OAI_DC_RECORD], None),
        )))

        assert len(list(harvester.fetch_records(''))) == 2
Beispiel #5
0
    def test_duplicate_resumption_tokens(self, monkeypatch):
        harvester = OAIHarvester(factories.SourceConfigFactory(harvester_kwargs={'metadata_prefix': 'oai_dc'}))
        monkeypatch.setattr(harvester, 'fetch_page', mock.Mock(return_value=([self.OAI_DC_RECORD], 'token')))

        records = []
        with pytest.raises(ValueError) as e:
            for x in harvester.fetch_records(''):
                records.append(x)

        assert len(records) == 1
        assert e.value.args == ('Found duplicate resumption token "token" from {!r}'.format(harvester), )
Beispiel #6
0
    def test_overrides(self, source_config_kwargs, task_kwargs, lock_config):
        source_config = factories.SourceConfigFactory(**source_config_kwargs)
        job = factories.HarvestJobFactory(source_config=source_config)

        if lock_config:
            t = SyncedThread(source_config.acquire_lock)
            t.start()

        try:
            tasks.harvest(job_id=job.id, **task_kwargs)
        finally:
            if lock_config:
                t.join()
Beispiel #7
0
    def test_failure_cases(self, source_config_kwargs, task_kwargs, lock_config, exception):
        source_config = factories.SourceConfigFactory(**source_config_kwargs)
        job = factories.HarvestJobFactory(source_config=source_config)

        if lock_config:
            t = SyncedThread(source_config.acquire_lock)
            t.start()

        try:
            with pytest.raises(exception):
                tasks.harvest(job_id=job.id, **task_kwargs)
        finally:
            if lock_config:
                t.join()
Beispiel #8
0
    def test_latest_date(self):
        source_config = factories.SourceConfigFactory(
            full_harvest=True,
            earliest_date=pendulum.parse('2017-01-01').date()
        )

        # We have a harvest job with start_date equal to earliest_date
        # but a different source_config
        factories.HarvestJobFactory(
            start_date=pendulum.parse('2017-01-01').date(),
            end_date=pendulum.parse('2017-01-02').date(),
        )

        assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 365
Beispiel #9
0
    def test_caught_up(self):
        source_config = factories.SourceConfigFactory(
            full_harvest=True,
            earliest_date=pendulum.parse('2017-01-01').date()
        )

        factories.HarvestJobFactory(
            source_config=source_config,
            start_date=pendulum.parse('2017-01-01').date(),
            end_date=pendulum.parse('2017-01-02').date(),
        )

        factories.HarvestJobFactory(
            source_config=source_config,
            start_date=pendulum.parse('2018-01-01').date(),
            end_date=pendulum.parse('2018-01-02').date(),
        )

        assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 0
Beispiel #10
0
def make_source_config(context, label, name=None, interval=None, time=None):
    kwargs = {'label': label}

    if name is None:
        kwargs['source'] = factories.SourceFactory()
    else:
        kwargs['source'] = models.Source.objects.get(name=name)

    if interval is not None:
        kwargs['harvest_interval'] = {
            'daily': '1 day',
            'weekly': '1 week',
            'fortnightly': '2 weeks',
            'yearly': '1 year',
            'monthly': '1 month',
        }[interval]

    if time is not None:
        kwargs['harvest_after'] = time

    factories.SourceConfigFactory(**kwargs)
Beispiel #11
0
def source_config():
    return factories.SourceConfigFactory()
Beispiel #12
0
 def test_ignores_deleted(self):
     sc = factories.SourceConfigFactory(source__is_deleted=True)
     assert list(sc.get_harvester().harvest(ignore_disabled=True)) == []
Beispiel #13
0
 def test_deleted_source(self):
     sc = factories.SourceConfigFactory(source__is_deleted=True)
     with pytest.raises(HarvesterDisabledError):
         list(sc.get_harvester().harvest())
Beispiel #14
0
 def source_config(self, request):
     config_disabled, source_deleted = request.param
     return factories.SourceConfigFactory(disabled=config_disabled,
                                          source__is_deleted=source_deleted)