Beispiel #1
0
def test_init(logger, callback):
    worker = Worker(hosts='host:7000,host:8000',
                    topic='foo',
                    timeout=1000,
                    callback=callback,
                    job_size=10000000,
                    cafile='/test/files/cafile',
                    certfile='/test/files/certfile',
                    keyfile='/test/files/keyfile',
                    crlfile='/test/files/crlfile')
    mock_consumer_cls.assert_called_once_with(
        'foo',
        group_id='foo',
        bootstrap_servers='host:7000,host:8000',
        max_partition_fetch_bytes=20000000,
        ssl_cafile='/test/files/cafile',
        ssl_certfile='/test/files/certfile',
        ssl_keyfile='/test/files/keyfile',
        ssl_crlfile='/test/files/crlfile',
        consumer_timeout_ms=-1,
        enable_auto_commit=False,
        auto_offset_reset='latest',
    )
    assert repr(worker) == 'Worker(topic=foo)'
    assert worker.hosts == ['host:7000', 'host:8000']
    assert worker.timeout == 1000
    assert worker.topic == 'foo'
    assert worker.consumer == mock_consumer
    assert not callback.called
    assert not logger.info.called
Beispiel #2
0
def test_start_job_malformed(logger, callback):
    mock_consumer.__iter__ = lambda x: iter([rec34])
    worker = Worker(
        hosts='localhost',
        topic='foo',
        timeout=1000,
        callback=callback,
    )
    worker.start()
    logger.info.assert_has_calls([
        mock.call('Starting Worker(topic=foo) ...'),
        mock.call('Processing {} ...'.format(rec34_repr)),
    ])
    logger.warning.assert_called_once_with(
        '{} malformed. Skipping ...'.format(rec34_repr))
    assert not callback.called
Beispiel #3
0
def test_start_job_success(logger, callback):
    mock_consumer.__iter__ = lambda x: iter([rec11])
    worker = Worker(
        hosts='localhost',
        topic='foo',
        callback=callback,
    )
    worker.start()
    logger.info.assert_has_calls([
        mock.call('Starting Worker(topic=foo) ...'),
        mock.call('Processing {} ...'.format(rec11_repr)),
        mock.call('Running Job 100: tests.utils.success_func(1, 2, c=3) ...'),
        mock.call('Job 100 returned: (1, 2, 3)'),
        mock.call('Executing callback ...')
    ])
    callback.assert_called_once_with('success', success_job, (1, 2, 3), None,
                                     None)
Beispiel #4
0
def test_start_job_timeout(logger, callback):
    mock_consumer.__iter__ = lambda x: iter([rec21])
    worker = Worker(
        hosts='localhost',
        topic='foo',
        timeout=1000,
        callback=callback,
    )
    worker.start()
    logger.info.assert_has_calls([
        mock.call('Starting Worker(topic=foo) ...'),
        mock.call('Processing {} ...'.format(rec21_repr)),
        mock.call('Running Job 300: tests.utils.timeout_func(2, 3, 4) ...'),
        mock.call('Executing callback ...')
    ])
    logger.error.assert_called_once_with(
        'Job 300 timed out after 100 seconds.')
    callback.assert_called_once_with('timeout', timeout_job, None, None, None)
Beispiel #5
0
def test_start_job_callback_fail(logger, callback):
    mock_consumer.__iter__ = lambda x: iter([rec11])
    expected_error = KeyError('foo')
    callback.side_effect = expected_error
    worker = Worker(
        hosts='localhost',
        topic='foo',
        callback=callback,
    )
    worker.start()
    logger.info.assert_has_calls([
        mock.call('Starting Worker(topic=foo) ...'),
        mock.call('Processing {} ...'.format(rec11_repr)),
        mock.call('Running Job 100: tests.utils.success_func(1, 2, c=3) ...'),
        mock.call('Job 100 returned: (1, 2, 3)'),
        mock.call('Executing callback ...')
    ])
    logger.exception.assert_called_once_with(
        'Callback failed: {}'.format(expected_error))
Beispiel #6
0
def test_worker_initialization_with_bad_args(hosts, consumer):
    with pytest.raises(AssertionError) as e:
        Worker(topic=True, consumer=consumer)
    assert str(e.value) == 'topic must be a str'

    with pytest.raises(AssertionError) as e:
        Worker(topic='topic', consumer='bar')
    assert str(e.value) == 'bad consumer instance'

    with pytest.raises(AssertionError) as e:
        bad_consumer = KafkaConsumer(bootstrap_servers=hosts)
        Worker(topic='topic', consumer=bad_consumer)
    assert str(e.value) == 'consumer must have group_id'

    with pytest.raises(AssertionError) as e:
        Worker(topic='topic', consumer=consumer, callback=1)
    assert str(e.value) == 'callback must be a callable'

    with pytest.raises(AssertionError) as e:
        Worker(topic='topic', consumer=consumer, deserializer=1)
    assert str(e.value) == 'deserializer must be a callable'

    with pytest.raises(AssertionError) as e:
        Worker(topic='topic', consumer=consumer, logger=1)
    assert str(e.value) == 'bad logger instance'
Beispiel #7
0
def test_start_job_failure(logger, callback):
    mock_consumer.__iter__ = lambda x: iter([rec12])
    worker = Worker(
        hosts='localhost',
        topic='foo',
        timeout=1000,
        callback=callback,
    )
    worker.start()
    logger.info.assert_has_calls([
        mock.call('Starting Worker(topic=foo) ...'),
        mock.call('Processing {} ...'.format(rec12_repr)),
        mock.call('Running Job 200: tests.utils.failure_func(1, 2, 3) ...'),
        mock.call('Executing callback ...')
    ])
    logger.exception.assert_called_with('Job 200 failed: failed!')
    assert len(callback.call_args_list) == 1

    callback_args = callback.call_args_list[0][0]
    assert callback_args[0] == 'failure'
    assert callback_args[1] == failure_job
    assert callback_args[2] is None
    assert isinstance(callback_args[3], ValueError)
    assert isinstance(callback_args[4], str)
import logging

from kafka import KafkaConsumer
from kq import Worker

# Set up logging.
formatter = logging.Formatter('[%(levelname)s] %(message)s')
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger = logging.getLogger('kq.worker')
logger.setLevel(logging.DEBUG)
logger.addHandler(stream_handler)

# Set up a Kafka consumer.
consumer = KafkaConsumer(bootstrap_servers='34.73.142.27:9092',
                         group_id='group',
                         auto_offset_reset='latest')

# Set up a worker.
worker = Worker(topic='topic_queue', consumer=consumer)
worker.start()
Beispiel #9
0
def worker(topic, consumer, callback, deserializer):
    return Worker(topic, consumer, callback, deserializer)
Beispiel #10
0
    elif status == 'failure':
        assert isinstance(job, Job)
        assert result is None
        assert exception is not None
        assert stacktrace is not None


def deserializer(serialized):
    """Example deserializer function with extra sanity checking.

    :param serialized: Serialized byte string.
    :type serialized: bytes
    :return: Deserialized job object.
    :rtype: kq.Job
    """
    assert isinstance(serialized, bytes), 'Expecting a bytes'
    return dill.loads(serialized)


if __name__ == '__main__':
    consumer = KafkaConsumer(bootstrap_servers='127.0.0.1:9092',
                             group_id='group',
                             enable_auto_commit=False,
                             auto_offset_reset='latest')
    worker = Worker(topic='topic',
                    consumer=consumer,
                    callback=callback,
                    deserializer=deserializer)
    worker.start()
Beispiel #11
0
        return ""
    return BeautifulSoup(content[0].extract().replace("</p><p>", "</p>&nbsp;<p>").encode("UTF-8"),
    "lxml").get_text()

def deserializer(serialized):
    assert isinstance(serialized, bytes), 'Expecting a bytes'
    msg = Parse(serialized, keywee_pb2.Item())
    args = [msg]
    kwargs = {}
    return Job(
        func=processMessage,
        args=args,
        kwargs=kwargs,
    )


if __name__ == '__main__':
    logger.info("Connecting to kafka [%s] with consumer group [%s] to topic [%s]", kafka_address, kafka_group_id, kafka_topic)
    consumer = KafkaConsumer(
        bootstrap_servers=kafka_address,
        group_id=kafka_group_id,
        enable_auto_commit=False,
        auto_offset_reset='latest'
    )
    worker = Worker(
        topic=kafka_topic,
        consumer=consumer,
        deserializer=deserializer,
    )
    worker.start()