Ejemplo n.º 1
0
    def test_01_basic_harvester(self):

        ### make sure queues/exchanges are created first and are empty
        consumer = queue.get_consumer('ckan.harvest.gather', 'harvest_job_id')
        consumer_fetch = queue.get_consumer('ckan.harvest.fetch',
                                            'harvest_object_id')
        consumer.queue_purge(queue='ckan.harvest.gather')
        consumer_fetch.queue_purge(queue='ckan.harvest.fetch')

        user = logic.get_action('get_site_user')({
            'model': model,
            'ignore_auth': True
        }, {})['name']

        context = {
            'model': model,
            'session': model.Session,
            'user': user,
            'api_version': 3,
            'ignore_auth': True
        }

        source_dict = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': 'http://www.lidata.eu/oaiprovider/',
            'source_type': 'test',
        }

        harvest_source = logic.get_action('harvest_source_create')(context,
                                                                   source_dict)

        harvest_job = logic.get_action('harvest_job_create')(
            context, {
                'source_id': harvest_source['id']
            })

        job_id = harvest_job['id']

        assert harvest_job['source_id'] == harvest_source['id'], harvest_job

        assert harvest_job['status'] == u'New'

        logic.get_action('harvest_jobs_run')(context, {
            'source_id': harvest_source['id']
        })

        assert logic.get_action('harvest_job_show')(context, {
            'id': job_id
        })['status'] == u'Running'

        reply = consumer.basic_get(queue='ckan.harvest.gather')

        queue.gather_callback(consumer, *reply)

        # reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        # queue.fetch_callback(consumer_fetch, *reply)

        assert 1 == 2
Ejemplo n.º 2
0
    def test_01_basic_harvester(self):

        ### make sure queues/exchanges are created first and are empty
        consumer = queue.get_consumer('ckan.harvest.gather','harvest_job_id')
        consumer_fetch = queue.get_consumer('ckan.harvest.fetch','harvest_object_id')
        consumer.queue_purge(queue='ckan.harvest.gather')
        consumer_fetch.queue_purge(queue='ckan.harvest.fetch')


        user = logic.get_action('get_site_user')(
            {'model': model, 'ignore_auth': True}, {}
        )['name']

        context = {'model': model, 'session': model.Session,
                   'user': user, 'api_version': 3, 'ignore_auth': True}

        source_dict = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': 'http://www.lidata.eu/oaiprovider/',
            'source_type': 'test',
        }

        harvest_source = logic.get_action('harvest_source_create')(
            context,
            source_dict
        )

        harvest_job = logic.get_action('harvest_job_create')(
            context,
            {'source_id':harvest_source['id']}
        )

        job_id = harvest_job['id']

        assert harvest_job['source_id'] == harvest_source['id'], harvest_job

        assert harvest_job['status'] == u'New'

        logic.get_action('harvest_jobs_run')(
            context,
            {'source_id':harvest_source['id']}
        )

        assert logic.get_action('harvest_job_show')(
            context,
            {'id': job_id}
        )['status'] == u'Running'

        reply = consumer.basic_get(queue='ckan.harvest.gather')

        queue.gather_callback(consumer, *reply)

        # reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        # queue.fetch_callback(consumer_fetch, *reply)

        assert 1 == 2
Ejemplo n.º 3
0
    def setup_class(cls):

        cls.gather_consumer = queue.get_consumer('ckan.harvest.gather.test',
                                                 'harvest_job_id')
        cls.fetch_consumer = queue.get_consumer('ckan.harvest.fetch.test',
                                                'harvest_object_id')

        cls.mock_url = 'http://some.dcat.file.rdf'

        # Minimal remote RDF file
        cls.remote_file = '''<?xml version="1.0" encoding="utf-8" ?>
Ejemplo n.º 4
0
    def setup_class(cls):

        cls.gather_consumer = queue.get_consumer('ckan.harvest.gather.test',
                                                 'harvest_job_id')
        cls.fetch_consumer = queue.get_consumer('ckan.harvest.fetch.test',
                                                'harvest_object_id')

        # Minimal remote RDF file
        cls.rdf_mock_url = 'http://some.dcat.file.rdf'
        cls.rdf_content_type = 'application/rdf+xml'
        cls.rdf_content = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:dct="http://purl.org/dc/terms/"
         xmlns:dcat="http://www.w3.org/ns/dcat#"
         xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dcat:Catalog rdf:about="https://data.some.org/catalog">
          <dcat:dataset>
            <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1">
              <dct:title>Example dataset 1</dct:title>
            </dcat:Dataset>
          </dcat:dataset>
          <dcat:dataset>
            <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/2">
              <dct:title>Example dataset 2</dct:title>
            </dcat:Dataset>
          </dcat:dataset>
        </dcat:Catalog>
        </rdf:RDF>
        '''
        cls.rdf_remote_file_small = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:dct="http://purl.org/dc/terms/"
         xmlns:dcat="http://www.w3.org/ns/dcat#"
         xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dcat:Catalog rdf:about="https://data.some.org/catalog">
          <dcat:dataset>
            <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1">
              <dct:title>Example dataset 1</dct:title>
            </dcat:Dataset>
          </dcat:dataset>
        </dcat:Catalog>
        </rdf:RDF>
        '''
        cls.rdf_remote_file_invalid = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:dcat="http://www.w3.org/ns/dcat#"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dcat:Catalog
        </rdf:RDF>
        '''

        #Minimal remote turtle file
        cls.ttl_mock_url = 'http://some.dcat.file.ttl'
        cls.ttl_content_type = 'text/turtle'
        cls.ttl_content = '''@prefix dcat: <http://www.w3.org/ns/dcat#> .
        @prefix dc: <http://purl.org/dc/terms/> .
        <https://data.some.org/catalog>
          a dcat:Catalog ;
          dcat:dataset <https://data.some.org/catalog/datasets/1>, <https://data.some.org/catalog/datasets/2> .
        <https://data.some.org/catalog/datasets/1>
          a dcat:Dataset ;
          dc:title "Example dataset 1" .
        <https://data.some.org/catalog/datasets/2>
          a dcat:Dataset ;
          dc:title "Example dataset 2" .
          '''
        cls.ttl_remote_file_small = '''@prefix dcat: <http://www.w3.org/ns/dcat#> .
        @prefix dc: <http://purl.org/dc/terms/> .
        <https://data.some.org/catalog>
          a dcat:Catalog ;
          dcat:dataset <https://data.some.org/catalog/datasets/1>, <https://data.some.org/catalog/datasets/2> .
        <https://data.some.org/catalog/datasets/1>
          a dcat:Dataset ;
          dc:title "Example dataset 1" .
          '''
        cls.ttl_remote_file_invalid = '''@prefix dcat: <http://www.w3.org/ns/dcat#> .
Ejemplo n.º 5
0
    def test_01_basic_harvester(self):

        ### make sure queues/exchanges are created first and are empty
        consumer = queue.get_consumer('ckan.harvest.test.gather',
                                      queue.get_gather_routing_key())
        consumer_fetch = queue.get_consumer('ckan.harvest.test.fetch',
                                            queue.get_fetch_routing_key())
        consumer.queue_purge(queue='ckan.harvest.test.gather')
        consumer_fetch.queue_purge(queue='ckan.harvest.test.fetch')


        user = logic.get_action('get_site_user')(
            {'model': model, 'ignore_auth': True}, {}
        )['name']

        context = {'model': model, 'session': model.Session,
                   'user': user, 'api_version': 3, 'ignore_auth': True}

        source_dict = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': 'basic_test',
            'source_type': 'test',
        }

        harvest_source = logic.get_action('harvest_source_create')(
            context,
            source_dict
        )

        assert harvest_source['source_type'] == 'test', harvest_source
        assert harvest_source['url'] == 'basic_test', harvest_source

        harvest_job = logic.get_action('harvest_job_create')(
            context,
            {'source_id': harvest_source['id'], 'run': True}
        )

        job_id = harvest_job['id']

        assert harvest_job['source_id'] == harvest_source['id'], harvest_job

        assert harvest_job['status'] == u'Running'

        assert logic.get_action('harvest_job_show')(
            context,
            {'id': job_id}
        )['status'] == u'Running'

        ## pop on item off the queue and run the callback
        reply = consumer.basic_get(queue='ckan.harvest.gather')

        queue.gather_callback(consumer, *reply)

        all_objects = model.Session.query(HarvestObject).all()

        assert len(all_objects) == 3
        assert all_objects[0].state == 'WAITING'
        assert all_objects[1].state == 'WAITING'
        assert all_objects[2].state == 'WAITING'


        assert len(model.Session.query(HarvestObject).all()) == 3
        assert len(model.Session.query(HarvestObjectExtra).all()) == 1

        ## do three times as three harvest objects
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)

        count = model.Session.query(model.Package) \
                .filter(model.Package.type=='dataset') \
                .count()
        assert count == 3
        all_objects = model.Session.query(HarvestObject).filter_by(current=True).all()

        assert_equal(len(all_objects), 3)
        assert_equal(all_objects[0].state, 'COMPLETE')
        assert_equal(all_objects[0].report_status, 'added')
        assert_equal(all_objects[1].state, 'COMPLETE')
        assert_equal(all_objects[1].report_status, 'added')
        assert_equal(all_objects[2].state, 'COMPLETE')
        assert_equal(all_objects[2].report_status, 'added')

        ## fire run again to check if job is set to Finished
        logic.get_action('harvest_jobs_run')(
            context,
            {'source_id':harvest_source['id']}
        )

        harvest_job = logic.get_action('harvest_job_show')(
            context,
            {'id': job_id}
        )

        assert_equal(harvest_job['status'], u'Finished')
        assert_equal(harvest_job['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errored': 0, 'deleted': 0})

        harvest_source_dict = logic.get_action('harvest_source_show')(
            context,
            {'id': harvest_source['id']}
        )

        assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errored': 0, 'deleted': 0})
        assert_equal(harvest_source_dict['status']['total_datasets'], 3)
        assert_equal(harvest_source_dict['status']['job_count'], 1)


        ########### Second run ########################
        harvest_job = logic.get_action('harvest_job_create')(
            context,
            {'source_id': harvest_source['id'], 'run': True}
        )

        job_id = harvest_job['id']
        assert logic.get_action('harvest_job_show')(
            context,
            {'id': job_id}
        )['status'] == u'Running'

        ## pop on item off the queue and run the callback
        reply = consumer.basic_get(queue='ckan.harvest.gather')
        queue.gather_callback(consumer, *reply)

        all_objects = model.Session.query(HarvestObject).all()

        assert len(all_objects) == 6

        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)

        count = model.Session.query(model.Package) \
                .filter(model.Package.type=='dataset') \
                .count()
        assert_equal(count, 3)

        all_objects = model.Session.query(HarvestObject).filter_by(report_status='added').all()
        assert_equal(len(all_objects), 3)

        all_objects = model.Session.query(HarvestObject).filter_by(report_status='updated').all()
        assert_equal(len(all_objects), 2)

        all_objects = model.Session.query(HarvestObject).filter_by(report_status='deleted').all()
        assert_equal(len(all_objects), 1)

        # run to make sure job is marked as finshed
        logic.get_action('harvest_jobs_run')(
            context,
            {'source_id':harvest_source['id']}
        )

        harvest_job = logic.get_action('harvest_job_show')(
            context,
            {'id': job_id}
        )
        assert_equal(harvest_job['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errored': 0, 'deleted': 1})

        context['detailed'] = True
        harvest_source_dict = logic.get_action('harvest_source_show')(
            context,
            {'id': harvest_source['id']}
        )

        assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errored': 0, 'deleted': 1})
        assert_equal(harvest_source_dict['status']['total_datasets'], 2)
        assert_equal(harvest_source_dict['status']['job_count'], 2)
Ejemplo n.º 6
0
    def test_01_basic_harvester(self):

        ### make sure queues/exchanges are created first and are empty
        consumer = queue.get_consumer('ckan.harvest.test.gather',
                                      queue.get_gather_routing_key())
        consumer_fetch = queue.get_consumer('ckan.harvest.test.fetch',
                                            queue.get_fetch_routing_key())
        consumer.queue_purge(queue='ckan.harvest.test.gather')
        consumer_fetch.queue_purge(queue='ckan.harvest.test.fetch')

        user = logic.get_action('get_site_user')({
            'model': model,
            'ignore_auth': True
        }, {})['name']

        context = {
            'model': model,
            'session': model.Session,
            'user': user,
            'api_version': 3,
            'ignore_auth': True
        }

        source_dict = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': 'basic_test',
            'source_type': 'test',
        }

        harvest_source = logic.get_action('harvest_source_create')(context,
                                                                   source_dict)

        assert harvest_source['source_type'] == 'test', harvest_source
        assert harvest_source['url'] == 'basic_test', harvest_source

        harvest_job = logic.get_action('harvest_job_create')(
            context, {
                'source_id': harvest_source['id'],
                'run': True
            })

        job_id = harvest_job['id']

        assert harvest_job['source_id'] == harvest_source['id'], harvest_job

        assert harvest_job['status'] == u'Running'

        assert logic.get_action('harvest_job_show')(context, {
            'id': job_id
        })['status'] == u'Running'

        ## pop on item off the queue and run the callback
        reply = consumer.basic_get(queue='ckan.harvest.gather')

        queue.gather_callback(consumer, *reply)

        all_objects = model.Session.query(HarvestObject).all()

        assert len(all_objects) == 3
        assert all_objects[0].state == 'WAITING'
        assert all_objects[1].state == 'WAITING'
        assert all_objects[2].state == 'WAITING'

        assert len(model.Session.query(HarvestObject).all()) == 3
        assert len(model.Session.query(HarvestObjectExtra).all()) == 1

        ## do three times as three harvest objects
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)

        count = model.Session.query(model.Package) \
                .filter(model.Package.type=='dataset') \
                .count()
        assert count == 3
        all_objects = model.Session.query(HarvestObject).filter_by(
            current=True).all()

        assert_equal(len(all_objects), 3)
        assert_equal(all_objects[0].state, 'COMPLETE')
        assert_equal(all_objects[0].report_status, 'added')
        assert_equal(all_objects[1].state, 'COMPLETE')
        assert_equal(all_objects[1].report_status, 'added')
        assert_equal(all_objects[2].state, 'COMPLETE')
        assert_equal(all_objects[2].report_status, 'added')

        ## fire run again to check if job is set to Finished
        logic.get_action('harvest_jobs_run')(context, {
            'source_id': harvest_source['id']
        })

        harvest_job = logic.get_action('harvest_job_show')(context, {
            'id': job_id
        })

        assert_equal(harvest_job['status'], u'Finished')
        assert_equal(harvest_job['stats'], {
            'added': 3,
            'updated': 0,
            'not modified': 0,
            'errors': 0,
            'deleted': 0
        })

        harvest_source_dict = logic.get_action('harvest_source_show')(
            context, {
                'id': harvest_source['id']
            })

        assert_equal(harvest_source_dict['status']['last_job']['stats'], {
            'added': 3,
            'updated': 0,
            'not modified': 0,
            'errors': 0,
            'deleted': 0
        })
        assert_equal(harvest_source_dict['status']['total_datasets'], 3)
        assert_equal(harvest_source_dict['status']['job_count'], 1)

        ########### Second run ########################
        harvest_job = logic.get_action('harvest_job_create')(
            context, {
                'source_id': harvest_source['id'],
                'run': True
            })

        job_id = harvest_job['id']
        assert logic.get_action('harvest_job_show')(context, {
            'id': job_id
        })['status'] == u'Running'

        ## pop on item off the queue and run the callback
        reply = consumer.basic_get(queue='ckan.harvest.gather')
        queue.gather_callback(consumer, *reply)

        all_objects = model.Session.query(HarvestObject).all()

        assert len(all_objects) == 6

        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)

        count = model.Session.query(model.Package) \
                .filter(model.Package.type=='dataset') \
                .count()
        assert_equal(count, 3)

        all_objects = model.Session.query(HarvestObject).filter_by(
            report_status='added').all()
        assert_equal(len(all_objects), 3)

        all_objects = model.Session.query(HarvestObject).filter_by(
            report_status='updated').all()
        assert_equal(len(all_objects), 2)

        all_objects = model.Session.query(HarvestObject).filter_by(
            report_status='deleted').all()
        assert_equal(len(all_objects), 1)

        # run to make sure job is marked as finshed
        logic.get_action('harvest_jobs_run')(context, {
            'source_id': harvest_source['id']
        })

        harvest_job = logic.get_action('harvest_job_show')(context, {
            'id': job_id
        })
        assert_equal(harvest_job['stats'], {
            'added': 0,
            'updated': 2,
            'not modified': 0,
            'errors': 0,
            'deleted': 1
        })

        context['detailed'] = True
        harvest_source_dict = logic.get_action('harvest_source_show')(
            context, {
                'id': harvest_source['id']
            })

        assert_equal(harvest_source_dict['status']['last_job']['stats'], {
            'added': 0,
            'updated': 2,
            'not modified': 0,
            'errors': 0,
            'deleted': 1
        })
        assert_equal(harvest_source_dict['status']['total_datasets'], 2)
        assert_equal(harvest_source_dict['status']['job_count'], 2)
Ejemplo n.º 7
0
    def test_01_basic_harvester(self):

        ### make sure queues/exchanges are created first and are empty
        consumer = queue.get_consumer('ckan.harvest.gather','harvest_job_id')
        consumer_fetch = queue.get_consumer('ckan.harvest.fetch','harvest_object_id')
        consumer.queue_purge(queue='ckan.harvest.gather')
        consumer_fetch.queue_purge(queue='ckan.harvest.fetch')


        user = logic.get_action('get_site_user')(
            {'model': model, 'ignore_auth': True}, {}
        )['name']

        context = {'model': model, 'session': model.Session,
                   'user': user, 'api_version': 3}

        source_dict = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': 'basic_test',
            'source_type': 'test',
        }

        harvest_source = logic.get_action('harvest_source_create')(
            context,
            source_dict
        )

        assert harvest_source['source_type'] == 'test', harvest_source
        assert harvest_source['url'] == 'basic_test', harvest_source


        harvest_job = logic.get_action('harvest_job_create')(
            context,
            {'source_id':harvest_source['id']}
        )

        job_id = harvest_job['id']

        assert harvest_job['source_id'] == harvest_source['id'], harvest_job

        assert harvest_job['status'] == u'New'

        logic.get_action('harvest_jobs_run')(
            context,
            {'source_id':harvest_source['id']}
        )

        assert logic.get_action('harvest_job_show')(
            context,
            {'id': job_id}
        )['status'] == u'Running'

        ## pop on item off the queue and run the callback
        reply = consumer.basic_get(queue='ckan.harvest.gather')

        queue.gather_callback(consumer, *reply)

        all_objects = model.Session.query(HarvestObject).all()

        assert len(all_objects) == 3
        assert all_objects[0].state == 'WAITING'
        assert all_objects[1].state == 'WAITING'
        assert all_objects[2].state == 'WAITING'


        assert len(model.Session.query(HarvestObject).all()) == 3
        assert len(model.Session.query(HarvestObjectExtra).all()) == 1

        ## do three times as three harvest objects
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)

        count = model.Session.query(model.Package) \
                .filter(model.Package.type=='dataset') \
                .count()
        assert count == 3
        all_objects = model.Session.query(HarvestObject).filter_by(current=True).all()

        assert len(all_objects) == 3
        assert all_objects[0].state == 'COMPLETE'
        assert all_objects[0].report_status == 'added'
        assert all_objects[1].state == 'COMPLETE'
        assert all_objects[1].report_status == 'added'
        assert all_objects[2].state == 'COMPLETE'
        assert all_objects[2].report_status == 'added'

        ## fire run again to check if job is set to Finished
        try:
            logic.get_action('harvest_jobs_run')(
                context,
                {'source_id':harvest_source['id']}
            )
        except Exception, e:
            assert 'There are no new harvesting jobs' in str(e)
Ejemplo n.º 8
0
    def test_01_basic_harvester(self):

        ### make sure queues/exchanges are created first and are empty
        consumer = queue.get_consumer('ckan.harvest.gather', 'harvest_job_id')
        consumer_fetch = queue.get_consumer('ckan.harvest.fetch',
                                            'harvest_object_id')
        consumer.queue_purge(queue='ckan.harvest.gather')
        consumer_fetch.queue_purge(queue='ckan.harvest.fetch')

        user = logic.get_action('get_site_user')({
            'model': model,
            'ignore_auth': True
        }, {})['name']

        context = {
            'model': model,
            'session': model.Session,
            'user': user,
            'api_version': 3
        }

        source_dict = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': 'basic_test',
            'source_type': 'test',
        }

        harvest_source = logic.get_action('harvest_source_create')(context,
                                                                   source_dict)

        assert harvest_source['source_type'] == 'test', harvest_source
        assert harvest_source['url'] == 'basic_test', harvest_source

        harvest_job = logic.get_action('harvest_job_create')(
            context, {
                'source_id': harvest_source['id']
            })

        job_id = harvest_job['id']

        assert harvest_job['source_id'] == harvest_source['id'], harvest_job

        assert harvest_job['status'] == u'New'

        logic.get_action('harvest_jobs_run')(context, {
            'source_id': harvest_source['id']
        })

        assert logic.get_action('harvest_job_show')(context, {
            'id': job_id
        })['status'] == u'Running'

        ## pop on item off the queue and run the callback
        reply = consumer.basic_get(queue='ckan.harvest.gather')

        queue.gather_callback(consumer, *reply)

        all_objects = model.Session.query(HarvestObject).all()

        assert len(all_objects) == 3
        assert all_objects[0].state == 'WAITING'
        assert all_objects[1].state == 'WAITING'
        assert all_objects[2].state == 'WAITING'

        assert len(model.Session.query(HarvestObject).all()) == 3
        assert len(model.Session.query(HarvestObjectExtra).all()) == 1

        ## do three times as three harvest objects
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)
        reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
        queue.fetch_callback(consumer_fetch, *reply)

        count = model.Session.query(model.Package) \
                .filter(model.Package.type=='dataset') \
                .count()
        assert count == 3
        all_objects = model.Session.query(HarvestObject).filter_by(
            current=True).all()

        assert len(all_objects) == 3
        assert all_objects[0].state == 'COMPLETE'
        assert all_objects[0].report_status == 'added'
        assert all_objects[1].state == 'COMPLETE'
        assert all_objects[1].report_status == 'added'
        assert all_objects[2].state == 'COMPLETE'
        assert all_objects[2].report_status == 'added'

        ## fire run again to check if job is set to Finished
        try:
            logic.get_action('harvest_jobs_run')(
                context, {
                    'source_id': harvest_source['id']
                })
        except Exception, e:
            assert 'There are no new harvesting jobs' in str(e)
Ejemplo n.º 9
0
    def setup_class(cls):

        cls.gather_consumer = queue.get_consumer('ckan.harvest.gather.test',
                                                 'harvest_job_id')
        cls.fetch_consumer = queue.get_consumer('ckan.harvest.fetch.test',
                                                'harvest_object_id')

        # Minimal remote RDF file
        cls.rdf_mock_url = 'http://some.dcat.file.rdf'
        cls.rdf_content_type = 'application/rdf+xml'
        cls.rdf_content = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:dct="http://purl.org/dc/terms/"
         xmlns:dcat="http://www.w3.org/ns/dcat#"
         xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dcat:Catalog rdf:about="https://data.some.org/catalog">
          <dcat:dataset>
            <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1">
              <dct:title>Example dataset 1</dct:title>
            </dcat:Dataset>
          </dcat:dataset>
          <dcat:dataset>
            <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/2">
              <dct:title>Example dataset 2</dct:title>
            </dcat:Dataset>
          </dcat:dataset>
        </dcat:Catalog>
        </rdf:RDF>
        '''
        cls.rdf_remote_file_small = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:dct="http://purl.org/dc/terms/"
         xmlns:dcat="http://www.w3.org/ns/dcat#"
         xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dcat:Catalog rdf:about="https://data.some.org/catalog">
          <dcat:dataset>
            <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1">
              <dct:title>Example dataset 1</dct:title>
            </dcat:Dataset>
          </dcat:dataset>
        </dcat:Catalog>
        </rdf:RDF>
        '''
        cls.rdf_remote_file_invalid = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:dcat="http://www.w3.org/ns/dcat#"
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dcat:Catalog
        </rdf:RDF>
        '''

        #Minimal remote turtle file
        cls.ttl_mock_url = 'http://some.dcat.file.ttl'
        cls.ttl_content_type = 'text/turtle'
        cls.ttl_content = '''@prefix dcat: <http://www.w3.org/ns/dcat#> .
        @prefix dc: <http://purl.org/dc/terms/> .
        <https://data.some.org/catalog>
          a dcat:Catalog ;
          dcat:dataset <https://data.some.org/catalog/datasets/1>, <https://data.some.org/catalog/datasets/2> .
        <https://data.some.org/catalog/datasets/1>
          a dcat:Dataset ;
          dc:title "Example dataset 1" .
        <https://data.some.org/catalog/datasets/2>
          a dcat:Dataset ;
          dc:title "Example dataset 2" .
          '''
        cls.ttl_remote_file_small = '''@prefix dcat: <http://www.w3.org/ns/dcat#> .
        @prefix dc: <http://purl.org/dc/terms/> .
        <https://data.some.org/catalog>
          a dcat:Catalog ;
          dcat:dataset <https://data.some.org/catalog/datasets/1>, <https://data.some.org/catalog/datasets/2> .
        <https://data.some.org/catalog/datasets/1>
          a dcat:Dataset ;
          dc:title "Example dataset 1" .
          '''
        cls.ttl_remote_file_invalid =  '''@prefix dcat: <http://www.w3.org/ns/dcat#> .