Exemple #1
0
            print_log('ERROR: Cannot process user [{}]'.format(user)+\
                '\nDetails: {} [ObjectId: {}]'.format(e, objid))

    if cursor.count() == 0:
        print_log('No documents to process, nothing to do')

    print_log('All done. Bye.')

    if not interactive:
        input()


debug = False
testrun = False

if __name__ == '__main__':
    from db import DBConnection

    debug = '--debug' in sys.argv
    testrun = '--testrun' in sys.argv

    dbconn = DBConnection()
    conn, result = dbconn.connect()

    if result != "OK":
        print(
            '>> Error while connecting to the database\n>> {}'.format(result))
        exit(1)

    run_fanout(True)
Exemple #2
0
class Consumer():
    """Main object to consume messages from kafak and write the to a PostgreSQL database"""
    def __init__(self, config_file, kafka_config_file, verbose):
        self.logger = helper.setup_logging(self.__class__.__name__, verbose)

        self.db_connection = DBConnection(verbose)
        self.kafka_consumer = None

        self.kafka_params = helper.config(kafka_config_file)
        self.db_params = helper.config(config_file, "db")
        self.params = helper.config(config_file)

    def start_consuming(self):
        """Periodically checks kafka for new messages, blocks"""

        interval = int(self.params['interval_s'])
        while True:
            self.consume()
            time.sleep(interval)

    def cleanup(self):
        """Disconnects from kafka, commits and closes the connection to the database"""

        if self.kafka_consumer:
            self.kafka_consumer.close()
        if self.db_connection:
            self.db_connection.commit_and_disconnect()

    def setup_db(self):
        """Sets up the connection to the database

        Returns:
            False if something goes wrong, True otherwise
        """
        conn = self.db_connection.connect(self.db_params)

        if not conn:
            return False

        self.db_connection.create_table(self.db_params['table_name'])

        return True

    def setup_consumer(self):
        """Sets up the kafka consumer

        Returns:
            False if something goes wrong, True otherwise
        """

        params = self.kafka_params
        try:
            self.kafka_consumer = KafkaConsumer(
                self.kafka_params['topic'],
                auto_offset_reset="earliest",
                value_deserializer=lambda x: json.loads(x.decode('utf-8')),
                bootstrap_servers=params['bootstrap_server'],
                client_id=self.params['kafka_client_id'],
                group_id=self.params['kafka_group_id'],
                security_protocol=params['security_protocol'],
                ssl_cafile=params['ssl_cafile'],
                ssl_certfile=params['ssl_certfile'],
                ssl_keyfile=params['ssl_keyfile'],
            )
        except NoBrokersAvailable as exception:
            self.logger.critical("Couldn't connect to kafka service: %s",
                                 exception)
            return None

        return True

    def consume(self):
        """Consumes all pending messages from kafka and writes them to the database"""

        self.logger.debug("Checking for new messages %s", time.ctime())
        raw_msgs = self.kafka_consumer.poll(timeout_ms=1000)
        for _, msgs in raw_msgs.items():
            for msg in msgs:
                self.logger.debug("Received and writing: %s", msg.value)

                self.db_connection.write_entry(msg.value['url'],
                                               msg.value['response_code'],
                                               msg.value['response_time'],
                                               msg.value['response_result'],
                                               msg.value['timestamp'])

                # Commit offsets so we won't get the same messages again
                self.kafka_consumer.commit()