def test_init(logger, callback): worker = Worker(hosts='host:7000,host:8000', topic='foo', timeout=1000, callback=callback, job_size=10000000, cafile='/test/files/cafile', certfile='/test/files/certfile', keyfile='/test/files/keyfile', crlfile='/test/files/crlfile') mock_consumer_cls.assert_called_once_with( 'foo', group_id='foo', bootstrap_servers='host:7000,host:8000', max_partition_fetch_bytes=20000000, ssl_cafile='/test/files/cafile', ssl_certfile='/test/files/certfile', ssl_keyfile='/test/files/keyfile', ssl_crlfile='/test/files/crlfile', consumer_timeout_ms=-1, enable_auto_commit=False, auto_offset_reset='latest', ) assert repr(worker) == 'Worker(topic=foo)' assert worker.hosts == ['host:7000', 'host:8000'] assert worker.timeout == 1000 assert worker.topic == 'foo' assert worker.consumer == mock_consumer assert not callback.called assert not logger.info.called
def test_start_job_malformed(logger, callback): mock_consumer.__iter__ = lambda x: iter([rec34]) worker = Worker( hosts='localhost', topic='foo', timeout=1000, callback=callback, ) worker.start() logger.info.assert_has_calls([ mock.call('Starting Worker(topic=foo) ...'), mock.call('Processing {} ...'.format(rec34_repr)), ]) logger.warning.assert_called_once_with( '{} malformed. Skipping ...'.format(rec34_repr)) assert not callback.called
def test_start_job_success(logger, callback): mock_consumer.__iter__ = lambda x: iter([rec11]) worker = Worker( hosts='localhost', topic='foo', callback=callback, ) worker.start() logger.info.assert_has_calls([ mock.call('Starting Worker(topic=foo) ...'), mock.call('Processing {} ...'.format(rec11_repr)), mock.call('Running Job 100: tests.utils.success_func(1, 2, c=3) ...'), mock.call('Job 100 returned: (1, 2, 3)'), mock.call('Executing callback ...') ]) callback.assert_called_once_with('success', success_job, (1, 2, 3), None, None)
def test_start_job_timeout(logger, callback): mock_consumer.__iter__ = lambda x: iter([rec21]) worker = Worker( hosts='localhost', topic='foo', timeout=1000, callback=callback, ) worker.start() logger.info.assert_has_calls([ mock.call('Starting Worker(topic=foo) ...'), mock.call('Processing {} ...'.format(rec21_repr)), mock.call('Running Job 300: tests.utils.timeout_func(2, 3, 4) ...'), mock.call('Executing callback ...') ]) logger.error.assert_called_once_with( 'Job 300 timed out after 100 seconds.') callback.assert_called_once_with('timeout', timeout_job, None, None, None)
def test_start_job_callback_fail(logger, callback): mock_consumer.__iter__ = lambda x: iter([rec11]) expected_error = KeyError('foo') callback.side_effect = expected_error worker = Worker( hosts='localhost', topic='foo', callback=callback, ) worker.start() logger.info.assert_has_calls([ mock.call('Starting Worker(topic=foo) ...'), mock.call('Processing {} ...'.format(rec11_repr)), mock.call('Running Job 100: tests.utils.success_func(1, 2, c=3) ...'), mock.call('Job 100 returned: (1, 2, 3)'), mock.call('Executing callback ...') ]) logger.exception.assert_called_once_with( 'Callback failed: {}'.format(expected_error))
def test_worker_initialization_with_bad_args(hosts, consumer): with pytest.raises(AssertionError) as e: Worker(topic=True, consumer=consumer) assert str(e.value) == 'topic must be a str' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer='bar') assert str(e.value) == 'bad consumer instance' with pytest.raises(AssertionError) as e: bad_consumer = KafkaConsumer(bootstrap_servers=hosts) Worker(topic='topic', consumer=bad_consumer) assert str(e.value) == 'consumer must have group_id' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer=consumer, callback=1) assert str(e.value) == 'callback must be a callable' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer=consumer, deserializer=1) assert str(e.value) == 'deserializer must be a callable' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer=consumer, logger=1) assert str(e.value) == 'bad logger instance'
def test_start_job_failure(logger, callback): mock_consumer.__iter__ = lambda x: iter([rec12]) worker = Worker( hosts='localhost', topic='foo', timeout=1000, callback=callback, ) worker.start() logger.info.assert_has_calls([ mock.call('Starting Worker(topic=foo) ...'), mock.call('Processing {} ...'.format(rec12_repr)), mock.call('Running Job 200: tests.utils.failure_func(1, 2, 3) ...'), mock.call('Executing callback ...') ]) logger.exception.assert_called_with('Job 200 failed: failed!') assert len(callback.call_args_list) == 1 callback_args = callback.call_args_list[0][0] assert callback_args[0] == 'failure' assert callback_args[1] == failure_job assert callback_args[2] is None assert isinstance(callback_args[3], ValueError) assert isinstance(callback_args[4], str)
import logging from kafka import KafkaConsumer from kq import Worker # Set up logging. formatter = logging.Formatter('[%(levelname)s] %(message)s') stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger = logging.getLogger('kq.worker') logger.setLevel(logging.DEBUG) logger.addHandler(stream_handler) # Set up a Kafka consumer. consumer = KafkaConsumer(bootstrap_servers='34.73.142.27:9092', group_id='group', auto_offset_reset='latest') # Set up a worker. worker = Worker(topic='topic_queue', consumer=consumer) worker.start()
def worker(topic, consumer, callback, deserializer): return Worker(topic, consumer, callback, deserializer)
elif status == 'failure': assert isinstance(job, Job) assert result is None assert exception is not None assert stacktrace is not None def deserializer(serialized): """Example deserializer function with extra sanity checking. :param serialized: Serialized byte string. :type serialized: bytes :return: Deserialized job object. :rtype: kq.Job """ assert isinstance(serialized, bytes), 'Expecting a bytes' return dill.loads(serialized) if __name__ == '__main__': consumer = KafkaConsumer(bootstrap_servers='127.0.0.1:9092', group_id='group', enable_auto_commit=False, auto_offset_reset='latest') worker = Worker(topic='topic', consumer=consumer, callback=callback, deserializer=deserializer) worker.start()
return "" return BeautifulSoup(content[0].extract().replace("</p><p>", "</p> <p>").encode("UTF-8"), "lxml").get_text() def deserializer(serialized): assert isinstance(serialized, bytes), 'Expecting a bytes' msg = Parse(serialized, keywee_pb2.Item()) args = [msg] kwargs = {} return Job( func=processMessage, args=args, kwargs=kwargs, ) if __name__ == '__main__': logger.info("Connecting to kafka [%s] with consumer group [%s] to topic [%s]", kafka_address, kafka_group_id, kafka_topic) consumer = KafkaConsumer( bootstrap_servers=kafka_address, group_id=kafka_group_id, enable_auto_commit=False, auto_offset_reset='latest' ) worker = Worker( topic=kafka_topic, consumer=consumer, deserializer=deserializer, ) worker.start()