예제 #1
0
def collect_produce_service_run(url: str,
                                producer: Producer,
                                topic: str,
                                sleep_time: int,
                                pattern: Optional[str] = None,
                                cycles: Optional[int] = None) -> None:
    """Service runner for web monitoring and posting to Kafka broker

    Args:
        url: url of monitored web-site
        producer: Kafka producer
        topic: Kafka topic this service will post to
        sleep_time: number of seconds to wait between metric collection
        pattern: optional regexp-like string to look at monitored web-site
        cycles: number of iterations to run the service. Runs infinitely if None

    Returns:
        None, runs until interrupted by user or iterated "iterations" times

    """
    log = logging.getLogger(f'{__file__}:WebMetricProducerService')
    log.info('Starting Website metric collection and publishing service.')
    with producer:
        counter = 0

        def proceed():
            return counter < cycles if cycles else True

        while True:
            try:
                result = get_metrics(url, pattern)
                producer.send(topic, value=result)
                counter += 1
                if not proceed():
                    break
                time.sleep(sleep_time)
            except KeyboardInterrupt:
                break
    def test_produce(self):
        producer_id = 1
        alpha = 0
        beta = 5
        producer = Producer(producer_id, alpha, beta)
        first_request = producer.produce()
        second_request = producer.produce()

        self.assertEqual(producer_id, first_request.producer_id,
                         'producer id of first request')
        self.assertEqual(producer_id, second_request.producer_id,
                         'second producer id of second request')
        self.assertEqual(0, first_request.id, 'first id')
        self.assertEqual(1, second_request.id, 'second id')
        self.assertTrue(first_request.creation_time >= alpha,
                        'lower bound of first request creation time')
        self.assertTrue(first_request.creation_time <= beta,
                        'upper bound of first request creation time')
        self.assertTrue(
            second_request.creation_time - first_request.creation_time >=
            alpha, 'lower bound of second request creation time')
        self.assertTrue(
            second_request.creation_time - first_request.creation_time <= beta,
            'upper bound of second request creation time')
    def __init__(self, producer_count, alpha, beta, device_count, lambda_param,
                 buffer_size):
        self.__producers = [
            Producer(i, alpha, beta) for i in range(producer_count)
        ]
        self.__devices = [Device(lambda_param) for _ in range(device_count)]
        self.__current_device = 0
        self.__buffer = Buffer(buffer_size)
        self.__alpha = alpha
        self.__beta = beta
        self.__lambda = lambda_param

        self.__stat = Statistics(producer_count, device_count)
        self.__creation_log = []
        self.__setting_log = []

        self.__event_log = []

        self.__release_log = []
        self.__deny_log = []
        self.__buffer_log = []
예제 #4
0
from src.producer import Producer

results = []
producer = Producer('unfiltered-articles-input')


class ResultPipeline(object):
    """ A custom pipeline that stores scrape results in 'results'"""
    @staticmethod
    def process_item(item, spider):
        producer.send_message(dict(item))
        results.append(dict(item))
예제 #5
0
TFIDF_TOPIC = 'tfidf-input'
UNIQUE_TOPIC = 'unique-articles-input'
analysis = Analysis()

unique_consumer = KafkaConsumer(
    UNIQUE_TOPIC,
    bootstrap_servers=[
        config.CONNECTION['host'] + ':' + config.CONNECTION['port']
    ],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    group_id='articles_consumer',
    value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING)))

tfidf_producer = Producer(TFIDF_TOPIC)


def format_message(message):
    message['title'] = ' '.join(message['title'])
    message['text'] = ' '.join(message['text'])
    return message


def publish_message(message):
    message.pop('_id', None)
    tfidf_producer.send_message(message)


def start_consumer():
    print('Started ' + UNIQUE_TOPIC + ' consumer')
예제 #6
0
from json import loads
import src.config as config
import src.constants as constants
from kafka import KafkaConsumer
from src.producer import Producer
from src.detection import Detection

non_articles_producer = Producer('non-articles-input')
articles_producer = Producer('articles-input')

consumer = KafkaConsumer(
    'unfiltered-articles-input',
    bootstrap_servers=[
        config.CONNECTION['host'] + ':' + config.CONNECTION['port']
    ],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    group_id='unfiltered_articles_consumer',
    value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING)))


def main():
    detection = Detection()

    print(detection.is_political('De EASA'))
    print(detection.is_political('De VVD'))
    print('started consumer')

    for message in consumer:
        message = dict(message.value)
        if detection.is_political(message['text']):
    def test_init(self):
        producer_id = 2
        producer = Producer(producer_id, 0, 2)

        self.assertEqual(producer.id, producer_id, 'producer id')
        self.assertEqual(producer.get_request_count(), 0, 'count of request')
    def test_get_request_count(self):
        producer = Producer(1, 0, 2)
        for _ in range(100):
            producer.produce()

        self.assertEqual(producer.get_request_count(), 100, 'count of request')
예제 #9
0
from time import sleep
from src.producer import Producer

producer = Producer('plaintext-input')


# //TODO: this is just to test the topic, remove this file later.
def main():
    print('started producer')
    producer.send_message('test message')

    for e in range(10):
        producer.send_message(e)
        sleep(5)


if __name__ == "__main__":
    main()


예제 #10
0
UNIQUE_TOPIC = 'unique-articles-input'
MONGO_MIN_SIMILARITY_SCORE = 10
MIN_SIMILARITY_SCORE = 0.9
SIMILAR_ARTICLES_LIMIT = 5

unfiltered_consumer = KafkaConsumer(
    ARTICLES_TOPIC,
    bootstrap_servers=[
        config.CONNECTION['host'] + ':' + config.CONNECTION['port']
    ],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    group_id='articles_consumer',
    value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING)))

unique_producer = Producer(UNIQUE_TOPIC)


def format_message(message):
    message['title'] = ' '.join(message['title'])
    message['text'] = ' '.join(message['text'])
    return message


def publish_message(message):
    message.pop('_id', None)
    unique_producer.send_message(message)


def start_consumer():
    print('Started ' + ARTICLES_TOPIC + ' consumer')