Esempio n. 1
0
    def test_redis_queue_purging(self):
        '''
        Test that Redis queue purging doesn't purge the wrong keys.
        '''
        if config.get('ckan.harvest.mq.type') != 'redis':
            pytest.skip()
        redis = queue.get_connection()
        try:
            redis.set('ckanext-harvest:some-random-key', 'foobar')

            # Create some fake jobs
            gather_publisher = queue.get_gather_publisher()
            gather_publisher.send({'harvest_job_id': str(uuid.uuid4())})
            gather_publisher.send({'harvest_job_id': str(uuid.uuid4())})
            fetch_publisher = queue.get_fetch_publisher()
            fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())})
            fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())})
            num_keys = redis.dbsize()

            # Create some fake objects
            gather_consumer = queue.get_gather_consumer()
            next(gather_consumer.consume(queue.get_gather_queue_name()))
            fetch_consumer = queue.get_fetch_consumer()
            next(fetch_consumer.consume(queue.get_fetch_queue_name()))

            assert redis.dbsize() > num_keys

            queue.purge_queues()

            assert redis.get('ckanext-harvest:some-random-key') == 'foobar'
            assert redis.dbsize() == num_keys
            assert redis.llen(queue.get_gather_routing_key()) == 0
            assert redis.llen(queue.get_fetch_routing_key()) == 0
        finally:
            redis.delete('ckanext-harvest:some-random-key')
Esempio n. 2
0
    def test_redis_corrupt(self, mock_log_error):
        '''
        Test that corrupt Redis doesn't stop harvest process and still processes other jobs.
        '''
        if config.get('ckan.harvest.mq.type') != 'redis':
            pytest.skip()
        redis = queue.get_connection()
        try:
            redis.set('ckanext-harvest:some-random-key-2', 'foobar')

            # make sure queues/exchanges are created first and are empty
            gather_consumer = queue.get_gather_consumer()
            fetch_consumer = queue.get_fetch_consumer()
            gather_consumer.queue_purge(queue=queue.get_gather_queue_name())
            fetch_consumer.queue_purge(queue=queue.get_fetch_queue_name())

            # Create some fake jobs and objects with no harvest_job_id
            gather_publisher = queue.get_gather_publisher()
            gather_publisher.send({'harvest_job_id': str(uuid.uuid4())})
            fetch_publisher = queue.get_fetch_publisher()
            fetch_publisher.send({'harvest_object_id': None})
            h_obj_id = str(uuid.uuid4())
            fetch_publisher.send({'harvest_object_id': h_obj_id})

            # Create some fake objects
            next(gather_consumer.consume(queue.get_gather_queue_name()))
            _, _, body = next(
                fetch_consumer.consume(queue.get_fetch_queue_name()))

            json_obj = json.loads(body)
            assert json_obj['harvest_object_id'] == h_obj_id

            assert mock_log_error.call_count == 1
            args, _ = mock_log_error.call_args_list[0]
            if six.PY2:
                assert "cannot concatenate 'str' and 'NoneType' objects" in args[
                    1]
            else:
                assert "must be str, not NoneType" in str(args[1])

        finally:
            redis.delete('ckanext-harvest:some-random-key-2')
Esempio n. 3
0
    def test_redis_queue_purging(self):
        '''
        Test that Redis queue purging doesn't purge the wrong keys.
        '''
        if config.get('ckan.harvest.mq.type') != 'redis':
            raise SkipTest()
        redis = queue.get_connection()
        try:
            redis.set('ckanext-harvest:some-random-key', 'foobar')

            # Create some fake jobs
            gather_publisher = queue.get_gather_publisher()
            gather_publisher.send({'harvest_job_id': str(uuid.uuid4())})
            gather_publisher.send({'harvest_job_id': str(uuid.uuid4())})
            fetch_publisher = queue.get_fetch_publisher()
            fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())})
            fetch_publisher.send({'harvest_object_id': str(uuid.uuid4())})
            num_keys = redis.dbsize()

            # Create some fake objects
            gather_consumer = queue.get_gather_consumer()
            next(gather_consumer.consume(queue.get_gather_queue_name()))
            fetch_consumer = queue.get_fetch_consumer()
            next(fetch_consumer.consume(queue.get_fetch_queue_name()))

            ok_(redis.dbsize() > num_keys)

            queue.purge_queues()

            assert_equal(redis.get('ckanext-harvest:some-random-key'),
                         'foobar')
            assert_equal(redis.dbsize(), num_keys)
            assert_equal(redis.llen(queue.get_gather_routing_key()), 0)
            assert_equal(redis.llen(queue.get_fetch_routing_key()), 0)
        finally:
            redis.delete('ckanext-harvest:some-random-key')
Esempio n. 4
0
    def test_raise_child_error_and_retry(self):
        """ if a harvest job for a child fails because 
            parent still not exists we need to ensure
            this job will be retried. 
            This test emulate the case we harvest children first
            (e.g. if we have several active queues).
            Just for CKAN 2.8 env"""

        # start harvest process with gather to create harvest objects
        url = 'http://127.0.0.1:%s/collection-1-parent-2-children.data.json' % self.mock_port
        self.run_gather(url=url)
        assert_equal(len(self.harvest_objects), 3)

        # create a publisher to send this objects to the fetch queue
        publisher = queue.get_fetch_publisher()

        for ho in self.harvest_objects:
            ho = harvest_model.HarvestObject.get(ho.id)  # refresh
            ho_data = json.loads(ho.content)
            assert_equal(ho.state, 'WAITING')
            log.info('HO: {}\n\tCurrent: {}'.format(ho_data['identifier'],
                                                    ho.current))
            assert_equal(ho.retry_times, 0)
            publisher.send({'harvest_object_id': ho.id})
            log.info('Harvest object sent to the fetch queue {} as {}'.format(
                ho_data['identifier'], ho.id))

        publisher.close()

        # run fetch for elements in the wrong order (first a child, the a parent)

        class FakeMethod(object):
            ''' This is to act like the method returned by AMQP'''
            def __init__(self, message):
                self.delivery_tag = message

        # get the fetch
        consumer_fetch = queue.get_fetch_consumer()
        qname = queue.get_fetch_queue_name()

        # first a child and assert to get an error
        r2 = json.dumps({"harvest_object_id": self.harvest_objects[1].id})
        r0 = FakeMethod(r2)
        with assert_raises(ParentNotHarvestedException):
            queue.fetch_callback(consumer_fetch, r0, None, r2)
        assert_equal(self.harvest_objects[1].retry_times, 1)
        assert_equal(self.harvest_objects[1].state, "ERROR")

        # run the parent later, like in a different queue
        r2 = json.dumps({"harvest_object_id": self.harvest_objects[0].id})
        r0 = FakeMethod(r2)
        queue.fetch_callback(consumer_fetch, r0, None, r2)
        assert_equal(self.harvest_objects[0].retry_times, 1)
        assert_equal(self.harvest_objects[0].state, "COMPLETE")

        # Check status on harvest objects
        # We expect one child with error, parent ok and second child still waiting
        for ho in self.harvest_objects:
            ho = harvest_model.HarvestObject.get(ho.id)  # refresh
            ho_data = json.loads(ho.content)
            idf = ho_data['identifier']
            log.info(
                '\nHO2: {}\n\tState: {}\n\tCurrent: {}\n\tGathered {}'.format(
                    idf, ho.state, ho.current, ho.gathered))
            if idf == 'OPM-ERround-0001':
                assert_equal(ho.state, 'COMPLETE')
            elif idf == 'OPM-ERround-0001-AWOL':
                assert_equal(ho.state, 'ERROR')
                ho_awol_id = ho.id
            elif idf == 'OPM-ERround-0001-Retire':
                assert_equal(ho.state, 'WAITING')
                ho_retire_id = ho.id
            else:
                raise Exception('Unexpected identifier: "{}"'.format(idf))

        # resubmit jobs and objects as harvest_jobs_run does
        # we expect the errored harvest object is in this queue
        queue.resubmit_jobs()
        queue.resubmit_objects()

        # iterate over the fetch consumer queue again and check pending harvest objects
        harvest_objects = []
        while True:
            method, header, body = consumer_fetch.basic_get(queue=qname)
            if body is None:
                break

            body_data = json.loads(body)
            ho_id = body_data.get('harvest_object_id', None)
            log.info('Adding ho_id {}'.format(ho_id))
            if ho_id is not None:
                ho = harvest_model.HarvestObject.get(ho_id)
                if ho is not None:
                    harvest_objects.append(ho)
                    content = json.loads(ho.content)
                    log.info('Harvest object found {}: {} '.format(
                        content['identifier'], ho.state))
                else:
                    log.info('Harvest object not found {}'.format(ho_id))

        ho_ids = [ho.id for ho in harvest_objects]

        # Now, we expect the waiting child and the errored one to be in the fetch queue

        log.info('Searching wainting object "Retire ID"')
        assert_in(ho_retire_id, ho_ids)

        log.info('Searching errored object "Awol ID"')
        assert_in(ho_awol_id, ho_ids)