def test_redis_queue_purging(self): ''' Test that Redis queue purging doesn't purge the wrong keys. ''' if config.get('ckan.harvest.mq.type') != 'redis': pytest.skip() redis = queue.get_connection() try: redis.set('ckanext-harvest:some-random-key', 'foobar') # Create some fake jobs gather_publisher = queue.get_gather_publisher() gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) fetch_publisher = queue.get_fetch_publisher() fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) num_keys = redis.dbsize() # Create some fake objects gather_consumer = queue.get_gather_consumer() next(gather_consumer.consume(queue.get_gather_queue_name())) fetch_consumer = queue.get_fetch_consumer() next(fetch_consumer.consume(queue.get_fetch_queue_name())) assert redis.dbsize() > num_keys queue.purge_queues() assert redis.get('ckanext-harvest:some-random-key') == 'foobar' assert redis.dbsize() == num_keys assert redis.llen(queue.get_gather_routing_key()) == 0 assert redis.llen(queue.get_fetch_routing_key()) == 0 finally: redis.delete('ckanext-harvest:some-random-key')
def test_redis_corrupt(self, mock_log_error): ''' Test that corrupt Redis doesn't stop harvest process and still processes other jobs. ''' if config.get('ckan.harvest.mq.type') != 'redis': pytest.skip() redis = queue.get_connection() try: redis.set('ckanext-harvest:some-random-key-2', 'foobar') # make sure queues/exchanges are created first and are empty gather_consumer = queue.get_gather_consumer() fetch_consumer = queue.get_fetch_consumer() gather_consumer.queue_purge(queue=queue.get_gather_queue_name()) fetch_consumer.queue_purge(queue=queue.get_fetch_queue_name()) # Create some fake jobs and objects with no harvest_job_id gather_publisher = queue.get_gather_publisher() gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) fetch_publisher = queue.get_fetch_publisher() fetch_publisher.send({'harvest_object_id': None}) h_obj_id = str(uuid.uuid4()) fetch_publisher.send({'harvest_object_id': h_obj_id}) # Create some fake objects next(gather_consumer.consume(queue.get_gather_queue_name())) _, _, body = next( fetch_consumer.consume(queue.get_fetch_queue_name())) json_obj = json.loads(body) assert json_obj['harvest_object_id'] == h_obj_id assert mock_log_error.call_count == 1 args, _ = mock_log_error.call_args_list[0] if six.PY2: assert "cannot concatenate 'str' and 'NoneType' objects" in args[ 1] else: assert "must be str, not NoneType" in str(args[1]) finally: redis.delete('ckanext-harvest:some-random-key-2')
def test_redis_queue_purging(self): ''' Test that Redis queue purging doesn't purge the wrong keys. ''' if config.get('ckan.harvest.mq.type') != 'redis': raise SkipTest() redis = queue.get_connection() try: redis.set('ckanext-harvest:some-random-key', 'foobar') # Create some fake jobs gather_publisher = queue.get_gather_publisher() gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) gather_publisher.send({'harvest_job_id': str(uuid.uuid4())}) fetch_publisher = queue.get_fetch_publisher() fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())}) num_keys = redis.dbsize() # Create some fake objects gather_consumer = queue.get_gather_consumer() next(gather_consumer.consume(queue.get_gather_queue_name())) fetch_consumer = queue.get_fetch_consumer() next(fetch_consumer.consume(queue.get_fetch_queue_name())) ok_(redis.dbsize() > num_keys) queue.purge_queues() assert_equal(redis.get('ckanext-harvest:some-random-key'), 'foobar') assert_equal(redis.dbsize(), num_keys) assert_equal(redis.llen(queue.get_gather_routing_key()), 0) assert_equal(redis.llen(queue.get_fetch_routing_key()), 0) finally: redis.delete('ckanext-harvest:some-random-key')
def test_raise_child_error_and_retry(self): """ if a harvest job for a child fails because parent still not exists we need to ensure this job will be retried. This test emulate the case we harvest children first (e.g. if we have several active queues). Just for CKAN 2.8 env""" # start harvest process with gather to create harvest objects url = 'http://127.0.0.1:%s/collection-1-parent-2-children.data.json' % self.mock_port self.run_gather(url=url) assert_equal(len(self.harvest_objects), 3) # create a publisher to send this objects to the fetch queue publisher = queue.get_fetch_publisher() for ho in self.harvest_objects: ho = harvest_model.HarvestObject.get(ho.id) # refresh ho_data = json.loads(ho.content) assert_equal(ho.state, 'WAITING') log.info('HO: {}\n\tCurrent: {}'.format(ho_data['identifier'], ho.current)) assert_equal(ho.retry_times, 0) publisher.send({'harvest_object_id': ho.id}) log.info('Harvest object sent to the fetch queue {} as {}'.format( ho_data['identifier'], ho.id)) publisher.close() # run fetch for elements in the wrong order (first a child, the a parent) class FakeMethod(object): ''' This is to act like the method returned by AMQP''' def __init__(self, message): self.delivery_tag = message # get the fetch consumer_fetch = queue.get_fetch_consumer() qname = queue.get_fetch_queue_name() # first a child and assert to get an error r2 = json.dumps({"harvest_object_id": self.harvest_objects[1].id}) r0 = FakeMethod(r2) with assert_raises(ParentNotHarvestedException): queue.fetch_callback(consumer_fetch, r0, None, r2) assert_equal(self.harvest_objects[1].retry_times, 1) assert_equal(self.harvest_objects[1].state, "ERROR") # run the parent later, like in a different queue r2 = json.dumps({"harvest_object_id": self.harvest_objects[0].id}) r0 = FakeMethod(r2) queue.fetch_callback(consumer_fetch, r0, None, r2) assert_equal(self.harvest_objects[0].retry_times, 1) assert_equal(self.harvest_objects[0].state, "COMPLETE") # Check status on harvest objects # We expect one child with error, parent ok and second child still waiting for ho in self.harvest_objects: ho = harvest_model.HarvestObject.get(ho.id) # refresh ho_data = json.loads(ho.content) idf = ho_data['identifier'] log.info( '\nHO2: {}\n\tState: {}\n\tCurrent: {}\n\tGathered {}'.format( idf, ho.state, ho.current, ho.gathered)) if idf == 'OPM-ERround-0001': assert_equal(ho.state, 'COMPLETE') elif idf == 'OPM-ERround-0001-AWOL': assert_equal(ho.state, 'ERROR') ho_awol_id = ho.id elif idf == 'OPM-ERround-0001-Retire': assert_equal(ho.state, 'WAITING') ho_retire_id = ho.id else: raise Exception('Unexpected identifier: "{}"'.format(idf)) # resubmit jobs and objects as harvest_jobs_run does # we expect the errored harvest object is in this queue queue.resubmit_jobs() queue.resubmit_objects() # iterate over the fetch consumer queue again and check pending harvest objects harvest_objects = [] while True: method, header, body = consumer_fetch.basic_get(queue=qname) if body is None: break body_data = json.loads(body) ho_id = body_data.get('harvest_object_id', None) log.info('Adding ho_id {}'.format(ho_id)) if ho_id is not None: ho = harvest_model.HarvestObject.get(ho_id) if ho is not None: harvest_objects.append(ho) content = json.loads(ho.content) log.info('Harvest object found {}: {} '.format( content['identifier'], ho.state)) else: log.info('Harvest object not found {}'.format(ho_id)) ho_ids = [ho.id for ho in harvest_objects] # Now, we expect the waiting child and the errored one to be in the fetch queue log.info('Searching wainting object "Retire ID"') assert_in(ho_retire_id, ho_ids) log.info('Searching errored object "Awol ID"') assert_in(ho_awol_id, ho_ids)