def test_redis_queue_purging(self): ''' Test that Redis queue purging doesn't purge the wrong keys. ''' if config.get('ckan.harvest.mq.type') != 'redis': pytest.skip() redis = queue.get_connection() try: redis.set('ckanext-harvest:some-random-key', 'foobar') # Create some fake jobs gather_publisher = queue.get_gather_publisher() gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) fetch_publisher = queue.get_fetch_publisher() fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) num_keys = redis.dbsize() # Create some fake objects gather_consumer = queue.get_gather_consumer() next(gather_consumer.consume(queue.get_gather_queue_name())) fetch_consumer = queue.get_fetch_consumer() next(fetch_consumer.consume(queue.get_fetch_queue_name())) assert redis.dbsize() > num_keys queue.purge_queues() assert redis.get('ckanext-harvest:some-random-key') == 'foobar' assert redis.dbsize() == num_keys assert redis.llen(queue.get_gather_routing_key()) == 0 assert redis.llen(queue.get_fetch_routing_key()) == 0 finally: redis.delete('ckanext-harvest:some-random-key')
def command(self): self._load_config() # We'll need a sysadmin user to perform most of the actions # We will use the sysadmin site user (named as the site_id) context = {'model':model,'session':model.Session,'ignore_auth':True} self.admin_user = get_action('get_site_user')(context,{}) print '' if len(self.args) == 0: self.parser.print_usage() sys.exit(1) cmd = self.args[0] if cmd == 'source': self.create_harvest_source() elif cmd == "rmsource": self.remove_harvest_source() elif cmd == 'sources': self.list_harvest_sources() elif cmd == 'job': self.create_harvest_job() elif cmd == 'jobs': self.list_harvest_jobs() elif cmd == 'run': self.run_harvester() elif cmd == 'gather_consumer': import logging from ckanext.harvest.queue import get_gather_consumer, gather_callback logging.getLogger('amqplib').setLevel(logging.INFO) consumer = get_gather_consumer() for method, header, body in consumer.consume(queue='ckan.harvest.gather'): gather_callback(consumer, method, header, body) elif cmd == 'fetch_consumer': import logging logging.getLogger('amqplib').setLevel(logging.INFO) from ckanext.harvest.queue import get_fetch_consumer, fetch_callback consumer = get_fetch_consumer() for method, header, body in consumer.consume(queue='ckan.harvest.fetch'): fetch_callback(consumer, method, header, body) elif cmd == 'purge_queues': from ckanext.harvest.queue import purge_queues purge_queues() elif cmd == 'initdb': self.initdb() elif cmd == 'import': self.initdb() self.import_stage() elif cmd == 'job-all': self.create_harvest_job_all() elif cmd == 'harvesters-info': harvesters_info = get_action('harvesters_info_show')() pprint(harvesters_info) elif cmd == 'reindex': self.reindex() else: print 'Command %s not recognized' % cmd
def setup(self): harvest_model.setup() queue.purge_queues() # create required tag vocabularies theme.create_updateInterval() theme.create_dataType() # create temp dir for this test self.temp_dir = tempfile.mkdtemp()
def setup(self): harvest_model.setup() queue.purge_queues() requests.get(clear_solr_url) user_dict = h.call_action('user_create', name='testuser', email='*****@*****.**', password='******') org_context = {'user': user_dict['name'], 'return_id_only': True} org_data_dict = {'name': 'geocat_org'} self.org_id = h.call_action('organization_create', org_context, **org_data_dict)
def test_redis_queue_purging(self): ''' Test that Redis queue purging doesn't purge the wrong keys. ''' if config.get('ckan.harvest.mq.type') != 'redis': raise SkipTest() redis = queue.get_connection() try: redis.set('ckanext-harvest:some-random-key', 'foobar') # Create some fake jobs gather_publisher = queue.get_gather_publisher() gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) fetch_publisher = queue.get_fetch_publisher() fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) num_keys = redis.dbsize() # Create some fake objects gather_consumer = queue.get_gather_consumer() next(gather_consumer.consume(queue.get_gather_queue_name())) fetch_consumer = queue.get_fetch_consumer() next(fetch_consumer.consume(queue.get_fetch_queue_name())) ok_(redis.dbsize() > num_keys) queue.purge_queues() assert_equal(redis.get('ckanext-harvest:some-random-key'), 'foobar') assert_equal(redis.dbsize(), num_keys) assert_equal(redis.llen(queue.get_gather_routing_key()), 0) assert_equal(redis.llen(queue.get_fetch_routing_key()), 0) finally: redis.delete('ckanext-harvest:some-random-key')
def command(self): self._load_config() # We'll need a sysadmin user to perform most of the actions # We will use the sysadmin site user (named as the site_id) context = { 'model': model, 'session': model.Session, 'ignore_auth': True } self.admin_user = get_action('get_site_user')(context, {}) print '' if len(self.args) == 0: self.parser.print_usage() sys.exit(1) cmd = self.args[0] if cmd == 'source': if len(self.args) > 2: self.create_harvest_source() else: self.show_harvest_source() elif cmd == 'rmsource': self.remove_harvest_source() elif cmd == 'clearsource': self.clear_harvest_source() elif cmd == 'sources': self.list_harvest_sources() elif cmd == 'job': self.create_harvest_job() elif cmd == 'jobs': self.list_harvest_jobs() elif cmd == 'job_abort': self.job_abort() elif cmd == 'run': self.run_harvester() elif cmd == 'run_test': self.run_test_harvest() elif cmd == 'gather_consumer': import logging from ckanext.harvest.queue import (get_gather_consumer, gather_callback, get_gather_queue_name) logging.getLogger('amqplib').setLevel(logging.INFO) consumer = get_gather_consumer() for method, header, body in consumer.consume( queue=get_gather_queue_name()): gather_callback(consumer, method, header, body) elif cmd == 'fetch_consumer': import logging logging.getLogger('amqplib').setLevel(logging.INFO) from ckanext.harvest.queue import (get_fetch_consumer, fetch_callback, get_fetch_queue_name) consumer = get_fetch_consumer() for method, header, body in consumer.consume( queue=get_fetch_queue_name()): fetch_callback(consumer, method, header, body) elif cmd == 'purge_queues': from ckanext.harvest.queue import purge_queues purge_queues() elif cmd == 'initdb': self.initdb() elif cmd == 'import': self.initdb() self.import_stage() elif cmd == 'job-all': self.create_harvest_job_all() elif cmd == 'harvesters-info': harvesters_info = get_action('harvesters_info_show')() pprint(harvesters_info) elif cmd == 'reindex': self.reindex() elif cmd == 'clean_harvest_log': self.clean_harvest_log() else: print 'Command %s not recognized' % cmd
def setup(self): harvest_model.setup() queue.purge_queues()
def purge_queues(self): from ckanext.harvest.queue import purge_queues purge_queues()
def harvest_job_abort(context, data_dict): ''' Aborts a harvest job. Given a harvest source_id, it looks for the latest one and (assuming it not already Finished) marks it as Finished. It also marks any of that source's harvest objects and (if not complete or error) marks them "ERROR", so any left in limbo are cleaned up. Does not actually stop running any queued harvest fetchs/objects. Specify either id or source_id. :param id: the job id to abort, or the id or name of the harvest source with a job to abort :type id: string :param source_id: the name or id of the harvest source with a job to abort :type source_id: string ''' check_access('harvest_job_abort', context, data_dict) model = context['model'] source_or_job_id = data_dict.get('source_id') or data_dict.get('id') if source_or_job_id: try: source = harvest_source_show(context, {'id': source_or_job_id}) except NotFound: job = get_action('harvest_job_show')( context, {'id': source_or_job_id}) else: # HarvestJob set status to 'Aborted' # Do not use harvest_job_list since it can use a lot of memory # Get the most recent job for the source job = model.Session.query(HarvestJob) \ .filter_by(source_id=source['id']) \ .order_by(HarvestJob.created.desc()).first() if not job: raise NotFound('Error: source has no jobs') job_id = job.id job = get_action('harvest_job_show')( context, {'id': job_id}) j_id = job['id'] if job['status'] != 'Finished': # i.e. New or Running job_obj = HarvestJob.get(job['id']) job_obj.status = new_status = 'Finished' model.repo.commit_and_remove() log.info('Harvest job changed status from "%s" to "%s"', job['status'], new_status) else: log.info('Harvest job unchanged. Source %s status is: "%s"', job['id'], job['status']) # HarvestObjects set to ERROR job_obj = HarvestJob.get(job['id']) objs = job_obj.objects for obj in objs: if obj.state not in ('COMPLETE', 'ERROR'): old_state = obj.state obj.state = 'ERROR' log.info('Harvest object changed state from "%s" to "%s": %s', old_state, obj.state, obj.id) else: log.info('Harvest object not changed from "%s": %s', obj.state, obj.id) model.repo.commit_and_remove() # add queue purge # from ckanext.harvest.queue import purge_queues log.info('Harvest queue purge start...') purge_queues(j_id) log.info('Harvest queue purged!') job_obj = HarvestJob.get(job['id']) return harvest_job_dictize(job_obj, context)
def clean_queues(): queue.purge_queues()
def teardown(self): h.reset_db() queue.purge_queues() requests.get(clear_solr_url)