print_log('ERROR: Cannot process user [{}]'.format(user)+\ '\nDetails: {} [ObjectId: {}]'.format(e, objid)) if cursor.count() == 0: print_log('No documents to process, nothing to do') print_log('All done. Bye.') if not interactive: input() debug = False testrun = False if __name__ == '__main__': from db import DBConnection debug = '--debug' in sys.argv testrun = '--testrun' in sys.argv dbconn = DBConnection() conn, result = dbconn.connect() if result != "OK": print( '>> Error while connecting to the database\n>> {}'.format(result)) exit(1) run_fanout(True)
class Consumer(): """Main object to consume messages from kafak and write the to a PostgreSQL database""" def __init__(self, config_file, kafka_config_file, verbose): self.logger = helper.setup_logging(self.__class__.__name__, verbose) self.db_connection = DBConnection(verbose) self.kafka_consumer = None self.kafka_params = helper.config(kafka_config_file) self.db_params = helper.config(config_file, "db") self.params = helper.config(config_file) def start_consuming(self): """Periodically checks kafka for new messages, blocks""" interval = int(self.params['interval_s']) while True: self.consume() time.sleep(interval) def cleanup(self): """Disconnects from kafka, commits and closes the connection to the database""" if self.kafka_consumer: self.kafka_consumer.close() if self.db_connection: self.db_connection.commit_and_disconnect() def setup_db(self): """Sets up the connection to the database Returns: False if something goes wrong, True otherwise """ conn = self.db_connection.connect(self.db_params) if not conn: return False self.db_connection.create_table(self.db_params['table_name']) return True def setup_consumer(self): """Sets up the kafka consumer Returns: False if something goes wrong, True otherwise """ params = self.kafka_params try: self.kafka_consumer = KafkaConsumer( self.kafka_params['topic'], auto_offset_reset="earliest", value_deserializer=lambda x: json.loads(x.decode('utf-8')), bootstrap_servers=params['bootstrap_server'], client_id=self.params['kafka_client_id'], group_id=self.params['kafka_group_id'], security_protocol=params['security_protocol'], ssl_cafile=params['ssl_cafile'], ssl_certfile=params['ssl_certfile'], ssl_keyfile=params['ssl_keyfile'], ) except NoBrokersAvailable as exception: self.logger.critical("Couldn't connect to kafka service: %s", exception) return None return True def consume(self): """Consumes all pending messages from kafka and writes them to the database""" self.logger.debug("Checking for new messages %s", time.ctime()) raw_msgs = self.kafka_consumer.poll(timeout_ms=1000) for _, msgs in raw_msgs.items(): for msg in msgs: self.logger.debug("Received and writing: %s", msg.value) self.db_connection.write_entry(msg.value['url'], msg.value['response_code'], msg.value['response_time'], msg.value['response_result'], msg.value['timestamp']) # Commit offsets so we won't get the same messages again self.kafka_consumer.commit()