Beispiel #1
0
def run_test_harvester(source_id_or_name, force_import):
    from ckanext.harvest import queue
    from ckanext.harvest.tests import lib
    from ckanext.harvest.logic import HarvestJobExists
    from ckanext.harvest.model import HarvestJob

    context = {
        "model": model,
        "session": model.Session,
        "user": _admin_user()["name"],
    }
    source = tk.get_action("harvest_source_show")(context, {
        "id": source_id_or_name
    })

    # Determine the job
    try:
        job_dict = tk.get_action("harvest_job_create")(
            context, {
                "source_id": source["id"]
            })
    except HarvestJobExists:
        running_jobs = tk.get_action("harvest_job_list")(
            context, {
                "source_id": source["id"],
                "status": "Running"
            })
        if running_jobs:
            print('\nSource "{0}" apparently has a "Running" job:\n{1}'.format(
                source.get("name") or source["id"], running_jobs))

            if six.PY2:
                resp = raw_input("Abort it? (y/n)")
            else:
                resp = input("Abort it? (y/n)")
            if not resp.lower().startswith("y"):
                sys.exit(1)
            job_dict = tk.get_action("harvest_job_abort")(
                context, {
                    "source_id": source["id"]
                })
        else:
            print("Reusing existing harvest job")
            jobs = tk.get_action("harvest_job_list")(context, {
                "source_id": source["id"],
                "status": "New"
            })
            assert (len(jobs) == 1
                    ), 'Multiple "New" jobs for this source! {0}'.format(jobs)
            job_dict = jobs[0]
    job_obj = HarvestJob.get(job_dict["id"])

    if force_import:
        job_obj.force_import = force_import

    harvester = queue.get_harvester(source["source_type"])
    assert harvester, "No harvester found for type: {0}".format(
        source["source_type"])
    lib.run_harvest_job(job_obj, harvester)
Beispiel #2
0
    def run_test_harvest(self):
        from ckanext.harvest import queue
        from ckanext.harvest.tests import lib
        from ckanext.harvest.logic import HarvestJobExists
        from ckanext.harvest.model import HarvestJob

        # Determine the source
        if len(self.args) >= 2:
            source_id_or_name = unicode(self.args[1])
        else:
            print 'Please provide a source id'
            sys.exit(1)
        context = {
            'model': model,
            'session': model.Session,
            'user': self.admin_user['name']
        }
        source = get_action('harvest_source_show')(context, {
            'id': source_id_or_name
        })

        # Determine the job
        try:
            job_dict = get_action('harvest_job_create')(
                context, {
                    'source_id': source['id']
                })
        except HarvestJobExists:
            running_jobs = get_action('harvest_job_list')(
                context, {
                    'source_id': source['id'],
                    'status': 'Running'
                })
            if running_jobs:
                print '\nSource "%s" apparently has a "Running" job:\n%r' \
                    % (source.get('name') or source['id'], running_jobs)
                resp = raw_input('Abort it? (y/n)')
                if not resp.lower().startswith('y'):
                    sys.exit(1)
                job_dict = get_action('harvest_job_abort')(
                    context, {
                        'source_id': source['id']
                    })
            else:
                print 'Reusing existing harvest job'
                jobs = get_action('harvest_job_list')(context, {
                    'source_id': source['id'],
                    'status': 'New'
                })
                assert len(jobs) == 1, \
                    'Multiple "New" jobs for this source! %r' % jobs
                job_dict = jobs[0]
        job_obj = HarvestJob.get(job_dict['id'])

        harvester = queue.get_harvester(source['source_type'])
        assert harvester, \
            'No harvester found for type: %s' % source['source_type']
        lib.run_harvest_job(job_obj, harvester)
Beispiel #3
0
    def run_test_harvest(self):
        from ckanext.harvest import queue
        from ckanext.harvest.tests import lib
        from ckanext.harvest.logic import HarvestJobExists
        from ckanext.harvest.model import HarvestJob

        # Determine the source
        if len(self.args) >= 2:
            source_id_or_name = unicode(self.args[1])
        else:
            print 'Please provide a source id'
            sys.exit(1)
        context = {'model': model, 'session': model.Session,
                   'user': self.admin_user['name']}
        source = get_action('harvest_source_show')(
            context, {'id': source_id_or_name})

        # Determine the job
        try:
            job_dict = get_action('harvest_job_create')(
                context, {'source_id': source['id']})
        except HarvestJobExists:
            running_jobs = get_action('harvest_job_list')(
                context, {'source_id': source['id'], 'status': 'Running'})
            if running_jobs:
                print '\nSource "%s" apparently has a "Running" job:\n%r' \
                    % (source.get('name') or source['id'], running_jobs)
                resp = raw_input('Abort it? (y/n)')
                if not resp.lower().startswith('y'):
                    sys.exit(1)
                job_dict = get_action('harvest_job_abort')(
                    context, {'source_id': source['id']})
            else:
                print 'Reusing existing harvest job'
                jobs = get_action('harvest_job_list')(
                    context, {'source_id': source['id'], 'status': 'New'})
                assert len(jobs) == 1, \
                    'Multiple "New" jobs for this source! %r' % jobs
                job_dict = jobs[0]
        job_obj = HarvestJob.get(job_dict['id'])

        harvester = queue.get_harvester(source['source_type'])
        assert harvester, \
            'No harvester found for type: %s' % source['source_type']
        lib.run_harvest_job(job_obj, harvester)
Beispiel #4
0
    def run_test_harvest(self):
        from ckanext.harvest import queue
        from ckanext.harvest.tests import lib
        from ckanext.harvest.logic import HarvestJobExists
        from ckanext.harvest.model import HarvestJob

        # Determine the source
        if len(self.args) >= 2:
            source_id_or_name = unicode(self.args[1])
        else:
            print "Please provide a source id"
            sys.exit(1)
        context = {"model": model, "session": model.Session, "user": self.admin_user["name"]}
        source = get_action("harvest_source_show")(context, {"id": source_id_or_name})

        # Determine the job
        try:
            job_dict = get_action("harvest_job_create")(context, {"source_id": source["id"]})
        except HarvestJobExists:
            running_jobs = get_action("harvest_job_list")(context, {"source_id": source["id"], "status": "Running"})
            if running_jobs:
                print '\nSource "%s" apparently has a "Running" job:\n%r' % (
                    source.get("name") or source["id"],
                    running_jobs,
                )
                resp = raw_input("Abort it? (y/n)")
                if not resp.lower().startswith("y"):
                    sys.exit(1)
                job_dict = get_action("harvest_job_abort")(context, {"source_id": source["id"]})
            else:
                print "Reusing existing harvest job"
                jobs = get_action("harvest_job_list")(context, {"source_id": source["id"], "status": "New"})
                assert len(jobs) == 1, 'Multiple "New" jobs for this source! %r' % jobs
                job_dict = jobs[0]
        job_obj = HarvestJob.get(job_dict["id"])

        harvester = queue.get_harvester(source["source_type"])
        assert harvester, "No harvester found for type: %s" % source["source_type"]
        lib.run_harvest_job(job_obj, harvester)
def test_harvester(test_config, expected_count):
    """
    Test the harvester by running it for real with mocked requests.

    We need to convert some blocks to helper functions or fixtures,
    but this is an easy way to verify that a harvester does what it's
    supposed to over the course of one or more runs, and we should
    build on it for future tests.
    """
    helpers.reset_db()
    context = {}
    context.setdefault('user', 'test_user')
    context.setdefault('ignore_auth', True)
    context['model'] = model
    context['session'] = model.Session
    user = {}
    user['name'] = 'test_user'
    user['email'] = '*****@*****.**'
    user['password'] = '******'
    helpers.call_action('user_create', context, **user)

    org = {'name': 'gome2_test_org', 'url': 'http://example.com/gome2'}
    owner_org = helpers.call_action('organization_create', context, **org)
    config = json.dumps(test_config)

    source = {
        'url': 'http://example.com/gome2_test_harvester',
        'name': 'gome2_test_harvester',
        'owner_org': owner_org['id'],
        'source_type': 'gome2',
        'config': config
    }
    harvest_source_create(context, source)
    source = harvest_source_show(context, {'id': source['name']})

    job_dict = get_action('harvest_job_create')(context, {
        'source_id': source['id']
    })
    job_obj = HarvestJob.get(job_dict['id'])

    harvester = queue.get_harvester(source['source_type'])

    with requests_mock.Mocker(real_http=True) as m:
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_O3',  # noqa: E501
            text=o3_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_O3',  # noqa: E501
            text=o3_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_O3',  # noqa: E501
            text=o3_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_NO2',  # noqa: E501
            text=no2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_NO2',  # noqa: E501
            text=no2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_NO2',  # noqa: E501
            text=no2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_TropNO2',  # noqa: E501
            text=tropno2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_TropNO2',  # noqa: E501
            text=tropno2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_TropNO2',  # noqa: E501
            text=tropno2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2',  # noqa: E501
            text=so2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2',  # noqa: E501
            text=so2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2',  # noqa: E501
            text=so2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2mass',  # noqa: E501
            text=so2mass_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2mass',  # noqa: E501
            text=so2mass_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2mass',  # noqa: E501
            text=so2mass_response)
        lib.run_harvest_job(job_obj, harvester)

    source = harvest_source_show(context, {'id': source['name']})
    assert source['status']['last_job']['status'] == 'Finished'
    assert source['status']['last_job']['stats']['added'] == expected_count

    # Re-run the harvester without forcing updates
    job_dict = get_action('harvest_job_create')(context, {
        'source_id': source['id']
    })
    job_obj = HarvestJob.get(job_dict['id'])

    harvester = queue.get_harvester(source['source_type'])

    with requests_mock.Mocker(real_http=True) as m:
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_O3',  # noqa: E501
            text=o3_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_O3',  # noqa: E501
            text=o3_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_O3',  # noqa: E501
            text=o3_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_NO2',  # noqa: E501
            text=no2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_NO2',  # noqa: E501
            text=no2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_NO2',  # noqa: E501
            text=no2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_TropNO2',  # noqa: E501
            text=tropno2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_TropNO2',  # noqa: E501
            text=tropno2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_TropNO2',  # noqa: E501
            text=tropno2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2',  # noqa: E501
            text=so2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2',  # noqa: E501
            text=so2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2',  # noqa: E501
            text=so2_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2mass',  # noqa: E501
            text=so2mass_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2mass',  # noqa: E501
            text=so2mass_response)
        m.register_uri(
            'GET',
            'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2mass',  # noqa: E501
            text=so2mass_response)
        lib.run_harvest_job(job_obj, harvester)

    source = harvest_source_show(context, {'id': source['name']})

    assert source['status']['last_job']['status'] == 'Finished'
    assert source['status']['last_job']['stats']['added'] == 0
    assert source['status']['last_job']['stats']['updated'] == 0

    # Verify that the org has the expected number of datasets now
    org_response = helpers.call_action('organization_show', context,
                                       **{'id': org['name']})
    assert org_response['package_count'] == expected_count
Beispiel #6
0
    def test_harvester(self):
        """
        Test the harvester by running it for real with mocked requests.

        We need to convert some blocks to helper functions or fixtures,
        but this is an easy way to verify that a harvester does what it's
        supposed to over the course of one or more runs, and we should
        build on it for future tests.
        """
        helpers.reset_db()
        context = {}
        context.setdefault('user', 'test_user')
        context.setdefault('ignore_auth', True)
        context['model'] = model
        context['session'] = model.Session
        user = {}
        user['name'] = 'test_user'
        user['email'] = '*****@*****.**'
        user['password'] = '******'
        helpers.call_action('user_create', context, **user)
        org = {'name': 'test_org', 'url': 'https://www.example.com'}

        owner_org = helpers.call_action('organization_create', context, **org)

        config_dict = {
            'source': 'esa_scihub',
            'update_all': False,
            'datasets_per_job': 10,
            'timeout': 10,
            'skip_raw': False
        }
        config = json.dumps(config_dict)
        source = {
            'url': 'http://www.scihub.org',
            'name': 'scihub_test_harvester',
            'owner_org': owner_org['id'],
            'source_type': 'esasentinel',
            'config': config
        }
        harvest_source_create(context, source)
        source = harvest_source_show(context, {'id': 'scihub_test_harvester'})
        job_dict = get_action('harvest_job_create')(context, {
            'source_id': source['id']
        })
        job_obj = HarvestJob.get(job_dict['id'])
        harvester = queue.get_harvester(source['source_type'])
        with requests_mock.Mocker(real_http=True) as m:
            m.register_uri('GET', '/dhus/search?q', text=self.raw_results)
            lib.run_harvest_job(job_obj, harvester)
        source = harvest_source_show(context, {'id': 'scihub_test_harvester'})

        assert source['status']['last_job']['status'] == 'Finished'
        assert source['status']['last_job']['stats']['added'] == 10

        # Re-run the harvester
        job_dict = get_action('harvest_job_create')(context, {
            'source_id': source['id']
        })
        job_obj = HarvestJob.get(job_dict['id'])
        harvester = queue.get_harvester(source['source_type'])
        with requests_mock.Mocker(real_http=True) as m:
            m.register_uri('GET', '/dhus/search?q', text=self.raw_results)
            lib.run_harvest_job(job_obj, harvester)
        source = harvest_source_show(context, {'id': 'scihub_test_harvester'})

        assert source['status']['last_job']['status'] == 'Finished'
        assert source['status']['last_job']['stats']['added'] == 0
        assert source['status']['last_job']['stats']['updated'] == 0

        # Re-run the harvester but force updates
        config_dict = {
            'source': 'esa_scihub',
            'update_all': True,
            'datasets_per_job': 10,
            'timeout': 10,
            'skip_raw': False
        }
        config = json.dumps(config_dict)
        source['config'] = config
        harvest_source_update(context, source)
        job_dict = get_action('harvest_job_create')(context, {
            'source_id': source['id']
        })
        job_obj = HarvestJob.get(job_dict['id'])
        harvester = queue.get_harvester(source['source_type'])
        with requests_mock.Mocker(real_http=True) as m:
            m.register_uri('GET', '/dhus/search?q', text=self.raw_results)
            lib.run_harvest_job(job_obj, harvester)
        source = harvest_source_show(context, {'id': 'scihub_test_harvester'})

        assert source['status']['last_job']['status'] == 'Finished'
        assert source['status']['last_job']['stats']['added'] == 0
        assert source['status']['last_job']['stats']['updated'] == 10

        # Verify that the org now has 10 datasets now
        org = helpers.call_action('organization_show', context,
                                  **{'id': 'test_org'})
        assert org['package_count'] == 10