def collect_produce_service_run(url: str, producer: Producer, topic: str, sleep_time: int, pattern: Optional[str] = None, cycles: Optional[int] = None) -> None: """Service runner for web monitoring and posting to Kafka broker Args: url: url of monitored web-site producer: Kafka producer topic: Kafka topic this service will post to sleep_time: number of seconds to wait between metric collection pattern: optional regexp-like string to look at monitored web-site cycles: number of iterations to run the service. Runs infinitely if None Returns: None, runs until interrupted by user or iterated "iterations" times """ log = logging.getLogger(f'{__file__}:WebMetricProducerService') log.info('Starting Website metric collection and publishing service.') with producer: counter = 0 def proceed(): return counter < cycles if cycles else True while True: try: result = get_metrics(url, pattern) producer.send(topic, value=result) counter += 1 if not proceed(): break time.sleep(sleep_time) except KeyboardInterrupt: break
def test_produce(self): producer_id = 1 alpha = 0 beta = 5 producer = Producer(producer_id, alpha, beta) first_request = producer.produce() second_request = producer.produce() self.assertEqual(producer_id, first_request.producer_id, 'producer id of first request') self.assertEqual(producer_id, second_request.producer_id, 'second producer id of second request') self.assertEqual(0, first_request.id, 'first id') self.assertEqual(1, second_request.id, 'second id') self.assertTrue(first_request.creation_time >= alpha, 'lower bound of first request creation time') self.assertTrue(first_request.creation_time <= beta, 'upper bound of first request creation time') self.assertTrue( second_request.creation_time - first_request.creation_time >= alpha, 'lower bound of second request creation time') self.assertTrue( second_request.creation_time - first_request.creation_time <= beta, 'upper bound of second request creation time')
def __init__(self, producer_count, alpha, beta, device_count, lambda_param, buffer_size): self.__producers = [ Producer(i, alpha, beta) for i in range(producer_count) ] self.__devices = [Device(lambda_param) for _ in range(device_count)] self.__current_device = 0 self.__buffer = Buffer(buffer_size) self.__alpha = alpha self.__beta = beta self.__lambda = lambda_param self.__stat = Statistics(producer_count, device_count) self.__creation_log = [] self.__setting_log = [] self.__event_log = [] self.__release_log = [] self.__deny_log = [] self.__buffer_log = []
from src.producer import Producer results = [] producer = Producer('unfiltered-articles-input') class ResultPipeline(object): """ A custom pipeline that stores scrape results in 'results'""" @staticmethod def process_item(item, spider): producer.send_message(dict(item)) results.append(dict(item))
TFIDF_TOPIC = 'tfidf-input' UNIQUE_TOPIC = 'unique-articles-input' analysis = Analysis() unique_consumer = KafkaConsumer( UNIQUE_TOPIC, bootstrap_servers=[ config.CONNECTION['host'] + ':' + config.CONNECTION['port'] ], auto_offset_reset='earliest', enable_auto_commit=True, group_id='articles_consumer', value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING))) tfidf_producer = Producer(TFIDF_TOPIC) def format_message(message): message['title'] = ' '.join(message['title']) message['text'] = ' '.join(message['text']) return message def publish_message(message): message.pop('_id', None) tfidf_producer.send_message(message) def start_consumer(): print('Started ' + UNIQUE_TOPIC + ' consumer')
from json import loads import src.config as config import src.constants as constants from kafka import KafkaConsumer from src.producer import Producer from src.detection import Detection non_articles_producer = Producer('non-articles-input') articles_producer = Producer('articles-input') consumer = KafkaConsumer( 'unfiltered-articles-input', bootstrap_servers=[ config.CONNECTION['host'] + ':' + config.CONNECTION['port'] ], auto_offset_reset='earliest', enable_auto_commit=True, group_id='unfiltered_articles_consumer', value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING))) def main(): detection = Detection() print(detection.is_political('De EASA')) print(detection.is_political('De VVD')) print('started consumer') for message in consumer: message = dict(message.value) if detection.is_political(message['text']):
def test_init(self): producer_id = 2 producer = Producer(producer_id, 0, 2) self.assertEqual(producer.id, producer_id, 'producer id') self.assertEqual(producer.get_request_count(), 0, 'count of request')
def test_get_request_count(self): producer = Producer(1, 0, 2) for _ in range(100): producer.produce() self.assertEqual(producer.get_request_count(), 100, 'count of request')
from time import sleep from src.producer import Producer producer = Producer('plaintext-input') # //TODO: this is just to test the topic, remove this file later. def main(): print('started producer') producer.send_message('test message') for e in range(10): producer.send_message(e) sleep(5) if __name__ == "__main__": main()
UNIQUE_TOPIC = 'unique-articles-input' MONGO_MIN_SIMILARITY_SCORE = 10 MIN_SIMILARITY_SCORE = 0.9 SIMILAR_ARTICLES_LIMIT = 5 unfiltered_consumer = KafkaConsumer( ARTICLES_TOPIC, bootstrap_servers=[ config.CONNECTION['host'] + ':' + config.CONNECTION['port'] ], auto_offset_reset='earliest', enable_auto_commit=True, group_id='articles_consumer', value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING))) unique_producer = Producer(UNIQUE_TOPIC) def format_message(message): message['title'] = ' '.join(message['title']) message['text'] = ' '.join(message['text']) return message def publish_message(message): message.pop('_id', None) unique_producer.send_message(message) def start_consumer(): print('Started ' + ARTICLES_TOPIC + ' consumer')