예제 #1
0
파일: logging.py 프로젝트: braedon/bottler
def configure_logging(json=False, verbose=False):
    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(module)s.%(funcName)s %(filename)s:%(lineno)s %(message)s'
    formatter = JogFormatter(log_format) if json else logging.Formatter(
        log_format)
    log_handler.setFormatter(formatter)

    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO,
                        handlers=[log_handler])
    logging.captureWarnings(True)
def main():
    signal.signal(signal.SIGTERM, signal_handler)

    parser = argparse.ArgumentParser(description='Export example metrics for Prometheus consumption.')
    parser.add_argument('-p', '--port', type=int, default=9900,
                        help='port to serve the metrics endpoint on. (default: 9900)')
    parser.add_argument('-c', '--config-file', default='example.cfg',
                        help='path to query config file. Can be absolute, or relative to the current working directory. (default: example.cfg)')
    parser.add_argument('--example1-disable', action='store_true',
                        help='disable example 1 monitoring.')
    parser.add_argument('--example2-disable', action='store_true',
                        help='disable example 2 monitoring.')
    parser.add_argument('-j', '--json-logging', action='store_true',
                        help='turn on json logging.')
    parser.add_argument('--log-level', default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
                        help='detail level to log. (default: INFO)')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='turn on verbose (DEBUG) logging. Overrides --log-level.')
    args = parser.parse_args()

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(log_format) if args.json_logging else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, args.log_level)
    logging.basicConfig(
        handlers=[log_handler],
        level=logging.DEBUG if args.verbose else log_level
    )
    logging.captureWarnings(True)

    port = args.port
    
    scheduler = None

    if not args.example1_disable:
        REGISTRY.register(Example1Collector())

    if not args.example2_disable:
        REGISTRY.register(Example2Collector())

    logging.info('Starting server...')
    start_http_server(port)
    logging.info('Server started on port %s', port)

    try:
        while True:
            time.sleep(5)
    except KeyboardInterrupt:
        pass

    shutdown()
예제 #3
0
def main():
    signal.signal(signal.SIGTERM, signal_handler)

    parser = argparse.ArgumentParser(
        description='Export ES query results to Prometheus.')
    parser.add_argument(
        '-e',
        '--es-cluster',
        default='localhost',
        help=
        'addresses of nodes in a Elasticsearch cluster to run queries on. Nodes should be separated by commas e.g. es1,es2. Ports can be provided if non-standard (9200) e.g. es1:9999 (default: localhost)'
    )
    parser.add_argument(
        '--ca-certs',
        help=
        'path to a CA certificate bundle. Can be absolute, or relative to the current working directory. If not specified, SSL certificate verification is disabled.'
    )
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=9206,
        help='port to serve the metrics endpoint on. (default: 9206)')
    parser.add_argument('--basic-user',
                        help='User for authentication. (default: no user)')
    parser.add_argument(
        '--basic-password',
        help='Password for authentication. (default: no password)')
    parser.add_argument(
        '--query-disable',
        action='store_true',
        help=
        'disable query monitoring. Config file does not need to be present if query monitoring is disabled.'
    )
    parser.add_argument(
        '-c',
        '--config-file',
        default='exporter.cfg',
        help=
        'path to query config file. Can be absolute, or relative to the current working directory. (default: exporter.cfg)'
    )
    parser.add_argument(
        '--config-dir',
        default='./config',
        help=
        'path to query config directory. Besides including the single config file specified by "--config-file" at first, all config files in the config directory will be sorted, merged, then included. Can be absolute, or relative to the current working directory. (default: ./config)'
    )
    parser.add_argument('--cluster-health-disable',
                        action='store_true',
                        help='disable cluster health monitoring.')
    parser.add_argument(
        '--cluster-health-timeout',
        type=float,
        default=10.0,
        help=
        'request timeout for cluster health monitoring, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--cluster-health-level',
        default='indices',
        choices=['cluster', 'indices', 'shards'],
        help=
        'level of detail for cluster health monitoring.  (default: indices)')
    parser.add_argument('--nodes-stats-disable',
                        action='store_true',
                        help='disable nodes stats monitoring.')
    parser.add_argument(
        '--nodes-stats-timeout',
        type=float,
        default=10.0,
        help=
        'request timeout for nodes stats monitoring, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--nodes-stats-metrics',
        type=nodes_stats_metrics_parser,
        help=
        'limit nodes stats to specific metrics. Metrics should be separated by commas e.g. indices,fs.'
    )
    parser.add_argument('--indices-stats-disable',
                        action='store_true',
                        help='disable indices stats monitoring.')
    parser.add_argument(
        '--indices-stats-timeout',
        type=float,
        default=10.0,
        help=
        'request timeout for indices stats monitoring, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--indices-stats-mode',
        default='cluster',
        choices=['cluster', 'indices'],
        help='detail mode for indices stats monitoring. (default: cluster)')
    parser.add_argument(
        '--indices-stats-metrics',
        type=indices_stats_metrics_parser,
        help=
        'limit indices stats to specific metrics. Metrics should be separated by commas e.g. indices,fs.'
    )
    parser.add_argument(
        '--indices-stats-fields',
        type=indices_stats_fields_parser,
        help=
        'include fielddata info for specific fields. Fields should be separated by commas e.g. indices,fs. Use \'*\' for all.'
    )
    parser.add_argument('-j',
                        '--json-logging',
                        action='store_true',
                        help='turn on json logging.')
    parser.add_argument(
        '--log-level',
        default='INFO',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='detail level to log. (default: INFO)')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='turn on verbose (DEBUG) logging. Overrides --log-level.')
    args = parser.parse_args()

    if args.basic_user and args.basic_password is None:
        parser.error('Username provided with no password.')
    elif args.basic_user is None and args.basic_password:
        parser.error('Password provided with no username.')
    elif args.basic_user:
        http_auth = (args.basic_user, args.basic_password)
    else:
        http_auth = None

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(
        log_format) if args.json_logging else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, args.log_level)
    logging.basicConfig(handlers=[log_handler],
                        level=logging.DEBUG if args.verbose else log_level)
    logging.captureWarnings(True)

    port = args.port
    es_cluster = args.es_cluster.split(',')

    if args.ca_certs:
        es_client = Elasticsearch(es_cluster,
                                  verify_certs=True,
                                  ca_certs=args.ca_certs,
                                  http_auth=http_auth)
    else:
        es_client = Elasticsearch(es_cluster,
                                  verify_certs=False,
                                  http_auth=http_auth)

    scheduler = None

    if not args.query_disable:
        scheduler = sched.scheduler()

        config = configparser.ConfigParser()
        config.read_file(open(args.config_file))

        config_dir_sorted_files = sorted(
            glob.glob(os.path.join(args.config_dir, '*.cfg')))
        config.read(config_dir_sorted_files)

        query_prefix = 'query_'
        queries = {}
        for section in config.sections():
            if section.startswith(query_prefix):
                query_name = section[len(query_prefix):]
                query_interval = config.getfloat(section,
                                                 'QueryIntervalSecs',
                                                 fallback=15)
                query_timeout = config.getfloat(section,
                                                'QueryTimeoutSecs',
                                                fallback=10)
                query_indices = config.get(section,
                                           'QueryIndices',
                                           fallback='_all')
                query = json.loads(config.get(section, 'QueryJson'))

                queries[query_name] = (query_interval, query_timeout,
                                       query_indices, query)

        if queries:
            for name, (interval, timeout, indices, query) in queries.items():
                func = partial(run_query, es_client, name, indices, query,
                               timeout)
                run_scheduler(scheduler, interval, func)
        else:
            logging.warn('No queries found in config file %s',
                         args.config_file)

    if not args.cluster_health_disable:
        REGISTRY.register(
            ClusterHealthCollector(es_client, args.cluster_health_timeout,
                                   args.cluster_health_level))

    if not args.nodes_stats_disable:
        REGISTRY.register(
            NodesStatsCollector(es_client,
                                args.nodes_stats_timeout,
                                metrics=args.nodes_stats_metrics))

    if not args.indices_stats_disable:
        parse_indices = args.indices_stats_mode == 'indices'
        REGISTRY.register(
            IndicesStatsCollector(es_client,
                                  args.indices_stats_timeout,
                                  parse_indices=parse_indices,
                                  metrics=args.indices_stats_metrics,
                                  fields=args.indices_stats_fields))

    logging.info('Starting server...')
    start_http_server(port)
    logging.info('Server started on port %s', port)

    try:
        if scheduler:
            scheduler.run()
        else:
            while True:
                time.sleep(5)
    except KeyboardInterrupt:
        pass

    shutdown()
def cli(**options):
    """Export Elasticsearch query results to Prometheus."""
    if options['basic_user'] and options['basic_password'] is None:
        click.BadOptionUsage('basic_user',
                             'Username provided with no password.')
    elif options['basic_user'] is None and options['basic_password']:
        click.BadOptionUsage('basic_password',
                             'Password provided with no username.')
    elif options['basic_user']:
        http_auth = (options['basic_user'], options['basic_password'])
    else:
        http_auth = None

    if not options['ca_certs'] and options['client_cert']:
        click.BadOptionUsage(
            'client_cert',
            '--client-cert can only be used when --ca-certs is provided.')
    elif not options['ca_certs'] and options['client_key']:
        click.BadOptionUsage(
            'client_key',
            '--client-key can only be used when --ca-certs is provided.')
    elif options['client_cert'] and not options['client_key']:
        click.BadOptionUsage(
            'client_cert',
            '--client-key must be provided when --client-cert is used.')
    elif not options['client_cert'] and options['client_key']:
        click.BadOptionUsage(
            'client_key',
            '--client-cert must be provided when --client-key is used.')

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(
        log_format) if options['json_logging'] else logging.Formatter(
            log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, options['log_level'])
    logging.basicConfig(
        handlers=[log_handler],
        level=logging.DEBUG if options['verbose'] else log_level)
    logging.captureWarnings(True)

    port = options['port']
    es_cluster = options['es_cluster'].split(',')

    if options['ca_certs']:
        es_client = Elasticsearch(es_cluster,
                                  verify_certs=True,
                                  ca_certs=options['ca_certs'],
                                  client_cert=options['client_cert'],
                                  client_key=options['client_key'],
                                  http_auth=http_auth)
    else:
        es_client = Elasticsearch(es_cluster,
                                  verify_certs=False,
                                  http_auth=http_auth)

    if options['indices_stats_indices'] and options[
            'indices_stats_mode'] != 'indices':
        raise click.BadOptionUsage(
            'indices_stats_indices',
            '--indices-stats-mode must be "indices" for '
            '--indices-stats-indices to be used.')

    scheduler = sched.scheduler()
    indices_for_stats = []
    config = None
    config_file_ext = '*.cfg'
    if not options['query_disable']:
        config = configparser.ConfigParser(converters=CONFIGPARSER_CONVERTERS)
        config.read(options['config_file'])

        config_dir_file_pattern = os.path.join(options['config_dir'],
                                               config_file_ext)
        config_dir_sorted_files = sorted(glob.glob(config_dir_file_pattern))
        config.read(config_dir_sorted_files)

        query_prefix = 'query_'
        queries = {}
        for section in config.sections():
            if section.startswith(query_prefix):
                query_name = section[len(query_prefix):]
                interval = config.getfloat(section,
                                           'QueryIntervalSecs',
                                           fallback=15)
                timeout = config.getfloat(section,
                                          'QueryTimeoutSecs',
                                          fallback=10)
                indices = config.get(section, 'QueryIndices', fallback='_all')
                query = json.loads(config.get(section, 'QueryJson'))
                on_error = config.getenum(section,
                                          'QueryOnError',
                                          fallback='drop')
                on_missing = config.getenum(section,
                                            'QueryOnMissing',
                                            fallback='drop')

                queries[query_name] = (interval, timeout, indices, query,
                                       on_error, on_missing)

        if queries:
            for query_name, (interval, timeout, indices, query, on_error,
                             on_missing) in queries.items():
                schedule_job(scheduler, interval, run_query, es_client,
                             query_name, indices, query, timeout, on_error,
                             on_missing)
        else:
            log.error('No queries found in config file(s)')
            return

        chain_query_prefix = 'chain_query_'
        chain_queries = {}
        for section in config.sections():
            if section.startswith(chain_query_prefix):
                query_name = section[len(chain_query_prefix):]
                interval = config.getfloat(section,
                                           'QueryIntervalSecs',
                                           fallback=15)
                timeout = config.getfloat(section,
                                          'QueryTimeoutSecs',
                                          fallback=10)
                query_def = json.loads(config.get(section, 'QueryJson'))
                on_error = config.getenum(section,
                                          'QueryOnError',
                                          fallback='drop')
                on_missing = config.getenum(section,
                                            'QueryOnMissing',
                                            fallback='drop')

                chain_queries[query_name] = (interval, timeout, query_def,
                                             on_error, on_missing)

        if chain_queries:
            for query_name, (interval, timeout, query_def, on_error,
                             on_missing) in chain_queries.items():
                schedule_job(scheduler, interval, run_chain_query, es_client,
                             query_name, query_def, timeout, on_error,
                             on_missing)

    if not options['cluster_health_disable']:
        REGISTRY.register(
            ClusterHealthCollector(es_client,
                                   options['cluster_health_timeout'],
                                   options['cluster_health_level']))

    if not options['nodes_stats_disable']:
        REGISTRY.register(
            NodesStatsCollector(es_client,
                                options['nodes_stats_timeout'],
                                metrics=options['nodes_stats_metrics']))

    if not options['indices_aliases_disable']:
        REGISTRY.register(
            IndicesAliasesCollector(es_client,
                                    options['indices_aliases_timeout']))

    if not options['indices_mappings_disable']:
        REGISTRY.register(
            IndicesMappingsCollector(es_client,
                                     options['indices_mappings_timeout']))

    if not options['indices_stats_disable']:
        parse_indices = options['indices_stats_mode'] == 'indices'
        REGISTRY.register(
            IndicesStatsCollector(es_client,
                                  options['indices_stats_timeout'],
                                  parse_indices=parse_indices,
                                  indices=options['indices_stats_indices'],
                                  metrics=options['indices_stats_metrics'],
                                  fields=options['indices_stats_fields']))

    if scheduler:
        REGISTRY.register(QueryMetricCollector())

    Thread(target=start_prometheus_server, args=(port, )).start()
    Thread(target=start_scheduler, args=(scheduler, )).start()
def main():
    signal.signal(signal.SIGTERM, signal_handler)

    parser = argparse.ArgumentParser(
        description='Export Kafka consumer offsets to Prometheus.')
    parser.add_argument(
        '-b',
        '--bootstrap-brokers',
        help='Addresses of brokers in a Kafka cluster to talk to.' +
        ' Brokers should be separated by commas e.g. broker1,broker2.' +
        ' Ports can be provided if non-standard (9092) e.g. brokers1:9999.' +
        ' (default: localhost)')
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=9208,
        help='Port to serve the metrics endpoint on. (default: 9208)')
    parser.add_argument(
        '-s',
        '--from-start',
        action='store_true',
        help='Start from the beginning of the `__consumer_offsets` topic.')
    parser.add_argument(
        '--topic-interval',
        type=float,
        default=30.0,
        help='How often to refresh topic information, in seconds. (default: 30)'
    )
    parser.add_argument(
        '--high-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh high-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--low-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh low-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--consumer-config',
        action='append',
        default=[],
        help=
        'Provide additional Kafka consumer config as a consumer.properties file. Multiple files will be merged, later files having precedence.'
    )
    parser.add_argument('-j',
                        '--json-logging',
                        action='store_true',
                        help='Turn on json logging.')
    parser.add_argument(
        '--log-level',
        default='INFO',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='Detail level to log. (default: INFO)')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Turn on verbose (DEBUG) logging. Overrides --log-level.')
    args = parser.parse_args()

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(log_format) \
        if args.json_logging \
        else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, args.log_level)
    logging.basicConfig(handlers=[log_handler],
                        level=logging.DEBUG if args.verbose else log_level)
    logging.captureWarnings(True)

    port = args.port

    consumer_config = {
        'bootstrap_servers': 'localhost',
        'auto_offset_reset': 'latest',
        'group_id': None,
        'consumer_timeout_ms': 500
    }

    args.consumer_config.append(os.environ.get('CONSUMER_CONFIG'))

    for filename in args.consumer_config:
        with open(filename) as f:
            raw_config = javaproperties.load(f)
            for k, v in raw_config.items():
                if v == '':
                    # Treat empty values as if they weren't set
                    continue

                if v.lower() in ['true', 'false']:
                    # Convert boolean values
                    v = True if v.lower() == 'true' else False

                else:
                    # Try and convert numeric values
                    try:
                        v = int(v)
                    except ValueError:
                        try:
                            v = float(v)
                        except ValueError:
                            pass

                consumer_config[k.replace('.', '_')] = v

    if args.bootstrap_brokers:
        consumer_config['bootstrap_servers'] = args.bootstrap_brokers

    consumer_config['bootstrap_servers'] = consumer_config[
        'bootstrap_servers'].split(',')

    if args.from_start:
        consumer_config['auto_offset_reset'] = 'earliest'

    consumer = KafkaConsumer('__consumer_offsets', **consumer_config)
    client = consumer._client

    topic_interval = args.topic_interval
    high_water_interval = args.high_water_interval
    low_water_interval = args.low_water_interval

    logging.info('Starting server...')
    start_http_server(port)
    logging.info('Server started on port %s', port)

    REGISTRY.register(collectors.HighwaterCollector())
    REGISTRY.register(collectors.LowwaterCollector())
    REGISTRY.register(collectors.ConsumerOffsetCollector())
    REGISTRY.register(collectors.ConsumerLagCollector())
    REGISTRY.register(collectors.ConsumerLeadCollector())
    REGISTRY.register(collectors.ConsumerCommitsCollector())
    REGISTRY.register(collectors.ConsumerCommitTimestampCollector())
    REGISTRY.register(collectors.ExporterOffsetCollector())
    REGISTRY.register(collectors.ExporterLagCollector())
    REGISTRY.register(collectors.ExporterLeadCollector())

    scheduled_jobs = setup_fetch_jobs(topic_interval, high_water_interval,
                                      low_water_interval, client)
    scheduler.run_scheduled_jobs(scheduled_jobs)

    try:
        while True:
            for message in consumer:
                offsets = collectors.get_offsets()
                commits = collectors.get_commits()
                commit_timestamps = collectors.get_commit_timestamps()
                exporter_offsets = collectors.get_exporter_offsets()

                # Commits store the offset a consumer should read from next,
                # so we need to add one to the current offset for semantic parity
                exporter_partition = message.partition
                exporter_offset = message.offset + 1
                exporter_offsets = ensure_dict_key(exporter_offsets,
                                                   exporter_partition,
                                                   exporter_offset)
                exporter_offsets[exporter_partition] = exporter_offset
                collectors.set_exporter_offsets(exporter_offsets)

                if message.key:
                    key_dict = parse_key(message.key)
                    # Only key versions 0 and 1 are offset commit messages.
                    # Ignore other versions.
                    if key_dict is not None and key_dict['version'] in (0, 1):

                        if message.value:
                            value_dict = parse_value(message.value)
                            if value_dict is not None:
                                group = key_dict['group']
                                topic = key_dict['topic']
                                partition = key_dict['partition']
                                offset = value_dict['offset']
                                commit_timestamp = value_dict[
                                    'commit_timestamp'] / 1000

                                offsets = ensure_dict_key(offsets, group, {})
                                offsets[group] = ensure_dict_key(
                                    offsets[group], topic, {})
                                offsets[group][topic] = ensure_dict_key(
                                    offsets[group][topic], partition, offset)
                                offsets[group][topic][partition] = offset
                                collectors.set_offsets(offsets)

                                commits = ensure_dict_key(commits, group, {})
                                commits[group] = ensure_dict_key(
                                    commits[group], topic, {})
                                commits[group][topic] = ensure_dict_key(
                                    commits[group][topic], partition, 0)
                                commits[group][topic][partition] += 1
                                collectors.set_commits(commits)

                                commit_timestamps = ensure_dict_key(
                                    commit_timestamps, group, {})
                                commit_timestamps[group] = ensure_dict_key(
                                    commit_timestamps[group], topic, {})
                                commit_timestamps[group][
                                    topic] = ensure_dict_key(
                                        commit_timestamps[group][topic],
                                        partition, 0)
                                commit_timestamps[group][topic][
                                    partition] = commit_timestamp
                                collectors.set_commit_timestamps(
                                    commit_timestamps)

                        else:
                            # The group has been removed, so we should not report metrics
                            group = key_dict['group']
                            topic = key_dict['topic']
                            partition = key_dict['partition']

                            if group in offsets:
                                if topic in offsets[group]:
                                    if partition in offsets[group][topic]:
                                        del offsets[group][topic][partition]

                            if group in commits:
                                if topic in commits[group]:
                                    if partition in commits[group][topic]:
                                        del commits[group][topic][partition]

                            if group in commit_timestamps:
                                if topic in commit_timestamps[group]:
                                    if partition in commit_timestamps[group][
                                            topic]:
                                        del commit_timestamps[group][topic][
                                            partition]

                # Check if we need to run any scheduled jobs
                # each message.
                scheduled_jobs = scheduler.run_scheduled_jobs(scheduled_jobs)

            # Also check if we need to run any scheduled jobs
            # each time the consumer times out, in case there
            # aren't any messages to consume.
            scheduled_jobs = scheduler.run_scheduled_jobs(scheduled_jobs)

    except KeyboardInterrupt:
        pass

    shutdown()
예제 #6
0
def cli(**options):
    """Export Elasticsearch query results to Prometheus."""
    if options['basic_user'] and options['basic_password'] is None:
        click.BadOptionUsage('basic_user',
                             'Username provided with no password.')
    elif options['basic_user'] is None and options['basic_password']:
        click.BadOptionUsage('basic_password',
                             'Password provided with no username.')
    elif options['basic_user']:
        http_auth = (options['basic_user'], options['basic_password'])
    else:
        http_auth = None

    if not options['ca_certs'] and options['client_cert']:
        click.BadOptionUsage(
            'client_cert',
            '--client-cert can only be used when --ca-certs is provided.')
    elif not options['ca_certs'] and options['client_key']:
        click.BadOptionUsage(
            'client_key',
            '--client-key can only be used when --ca-certs is provided.')
    elif options['client_cert'] and not options['client_key']:
        click.BadOptionUsage(
            'client_cert',
            '--client-key must be provided when --client-cert is used.')
    elif not options['client_cert'] and options['client_key']:
        click.BadOptionUsage(
            'client_key',
            '--client-cert must be provided when --client-key is used.')

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(
        log_format) if options['json_logging'] else logging.Formatter(
            log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, options['log_level'])
    logging.basicConfig(
        handlers=[log_handler],
        level=logging.DEBUG if options['verbose'] else log_level)
    logging.captureWarnings(True)

    port = options['port']
    es_cluster = options['es_cluster'].split(',')

    if es_cluster == ['consul']:
        consul_host = options['consul_host']
        consul_port = options['consul_port']

        # check consul connection
        while True:
            try:
                log.info('Connecting to Consul agent at {}:{}...'.format(
                    consul_host, consul_port))
                consul_client.connect(consul_host, consul_port)
                es_cluster = [
                    consul_client.get_service_address(options['es_service'])
                ]
                break
            except Exception:
                log.info('retrying to connect to consul after 5 seconds...')
                time.sleep(5)
                continue
        log.info('Found Elasticsearch registered at {}.'.format(es_cluster))

    if options['ca_certs']:
        es_client = Elasticsearch(es_cluster,
                                  verify_certs=True,
                                  ca_certs=options['ca_certs'],
                                  client_cert=options['client_cert'],
                                  client_key=options['client_key'],
                                  http_auth=http_auth)
    else:
        es_client = Elasticsearch(es_cluster,
                                  verify_certs=False,
                                  http_auth=http_auth)
    # check es health
    while True:
        try:
            log.info('Checking Elasticsearch client health...')
            es_client.cluster.health()
            break
        except Exception:
            log.info(
                'Elasticsearch client is not ready. Retry after 5 seconds...')
            time.sleep(5)
            continue

    log.info('Elasticsearch is ready')

    scheduler = None

    if not options['query_disable']:
        config = configparser.ConfigParser(converters=CONFIGPARSER_CONVERTERS)
        config.read(options['config_file'])

        config_dir_file_pattern = os.path.join(options['config_dir'], '*.cfg')
        config_dir_sorted_files = sorted(glob.glob(config_dir_file_pattern))
        config.read(config_dir_sorted_files)

        query_prefix = 'query_'
        queries = {}
        for section in config.sections():
            if section.startswith(query_prefix):
                query_name = section[len(query_prefix):]
                interval = config.getfloat(section,
                                           'QueryIntervalSecs',
                                           fallback=15)
                timeout = config.getfloat(section,
                                          'QueryTimeoutSecs',
                                          fallback=10)
                indices = config.get(section, 'QueryIndices', fallback='_all')
                query = json.loads(config.get(section, 'QueryJson'))
                on_error = config.getenum(section,
                                          'QueryOnError',
                                          fallback='drop')
                on_missing = config.getenum(section,
                                            'QueryOnMissing',
                                            fallback='drop')

                queries[query_name] = (interval, timeout, indices, query,
                                       on_error, on_missing)

        scheduler = sched.scheduler()

        if queries:
            for query_name, (interval, timeout, indices, query, on_error,
                             on_missing) in queries.items():
                schedule_job(scheduler, interval, run_query, es_client,
                             query_name, indices, query, timeout, on_error,
                             on_missing)
        else:
            log.error('No queries found in config file(s)')
            return

    if not options['cluster_health_disable']:
        REGISTRY.register(
            ClusterHealthCollector(es_client,
                                   options['cluster_health_timeout'],
                                   options['cluster_health_level']))

    if not options['nodes_stats_disable']:
        REGISTRY.register(
            NodesStatsCollector(es_client,
                                options['nodes_stats_timeout'],
                                metrics=options['nodes_stats_metrics']))

    if not options['indices_aliases_disable']:
        REGISTRY.register(
            IndicesAliasesCollector(es_client,
                                    options['indices_aliases_timeout']))

    if not options['indices_mappings_disable']:
        REGISTRY.register(
            IndicesMappingsCollector(es_client,
                                     options['indices_mappings_timeout']))

    if not options['indices_stats_disable']:
        parse_indices = options['indices_stats_mode'] == 'indices'
        REGISTRY.register(
            IndicesStatsCollector(es_client,
                                  options['indices_stats_timeout'],
                                  parse_indices=parse_indices,
                                  metrics=options['indices_stats_metrics'],
                                  fields=options['indices_stats_fields']))

    if scheduler:
        REGISTRY.register(QueryMetricCollector())

    log.info('Starting server...')
    start_http_server(port)
    log.info('Server started on port %(port)s', {'port': port})

    if scheduler:
        scheduler.run()
    else:
        while True:
            time.sleep(5)
예제 #7
0
def main():
    signal.signal(signal.SIGTERM, signal_handler)

    parser = argparse.ArgumentParser(
        description='Export Kafka consumer offsets to Prometheus.')
    parser.add_argument(
        '-b',
        '--bootstrap-brokers',
        default='localhost',
        help='Addresses of brokers in a Kafka cluster to talk to.' +
        ' Brokers should be separated by commas e.g. broker1,broker2.' +
        ' Ports can be provided if non-standard (9092) e.g. brokers1:9999.' +
        ' (default: localhost)')
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=9208,
        help='Port to serve the metrics endpoint on. (default: 9208)')
    parser.add_argument(
        '-s',
        '--from-start',
        action='store_true',
        help='Start from the beginning of the `__consumer_offsets` topic.')
    parser.add_argument(
        '--topic-interval',
        type=float,
        default=30.0,
        help='How often to refresh topic information, in seconds. (default: 30)'
    )
    parser.add_argument(
        '--high-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh high-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--low-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh low-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--consumer-config',
        action='append',
        default=[],
        help=
        'Provide additional Kafka consumer config as a consumer.properties file. Multiple files will be merged, later files having precedence.'
    )
    parser.add_argument('-j',
                        '--json-logging',
                        action='store_true',
                        help='Turn on json logging.')
    parser.add_argument(
        '--log-level',
        default='INFO',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='detail level to log. (default: INFO)')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='turn on verbose (DEBUG) logging. Overrides --log-level.')
    args = parser.parse_args()

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(log_format) \
        if args.json_logging \
        else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, args.log_level)
    logging.basicConfig(handlers=[log_handler],
                        level=logging.DEBUG if args.verbose else log_level)
    logging.captureWarnings(True)

    port = args.port

    consumer_config = {
        'bootstrap_servers': 'localhost',
        'auto_offset_reset': 'latest',
        'group_id': None,
        'consumer_timeout_ms': 500
    }

    for filename in args.consumer_config:
        with open(filename) as f:
            raw_config = javaproperties.load(f)
            converted_config = {
                k.replace('.', '_'): v
                for k, v in raw_config.items()
            }
            consumer_config.update(converted_config)

    if args.bootstrap_brokers:
        consumer_config['bootstrap_servers'] = args.bootstrap_brokers.split(
            ',')

    if args.from_start:
        consumer_config['auto_offset_reset'] = 'earliest'

    consumer = KafkaConsumer('__consumer_offsets', **consumer_config)
    client = consumer._client

    topic_interval = args.topic_interval
    high_water_interval = args.high_water_interval
    low_water_interval = args.low_water_interval

    logging.info('Starting server...')
    start_http_server(port)
    logging.info('Server started on port %s', port)

    REGISTRY.register(collectors.HighwaterCollector())
    REGISTRY.register(collectors.LowwaterCollector())
    REGISTRY.register(collectors.ConsumerOffsetCollector())
    REGISTRY.register(collectors.ConsumerLagCollector())
    REGISTRY.register(collectors.ConsumerLeadCollector())
    REGISTRY.register(collectors.ConsumerCommitsCollector())
    REGISTRY.register(collectors.ExporterOffsetCollector())
    REGISTRY.register(collectors.ExporterLagCollector())
    REGISTRY.register(collectors.ExporterLeadCollector())

    scheduled_jobs = setup_fetch_jobs(topic_interval, high_water_interval,
                                      low_water_interval, client)

    try:
        while True:
            for message in consumer:
                offsets = collectors.get_offsets()
                commits = collectors.get_commits()
                exporter_offsets = collectors.get_exporter_offsets()

                exporter_partition = message.partition
                exporter_offset = message.offset
                exporter_offsets = ensure_dict_key(exporter_offsets,
                                                   exporter_partition,
                                                   exporter_offset)
                exporter_offsets[exporter_partition] = exporter_offset
                collectors.set_exporter_offsets(exporter_offsets)

                if message.key and message.value:
                    key = parse_key(message.key)
                    if key:
                        value = parse_value(message.value)

                        group = key[1]
                        topic = key[2]
                        partition = key[3]
                        offset = value[1]

                        offsets = ensure_dict_key(offsets, group, {})
                        offsets[group] = ensure_dict_key(
                            offsets[group], topic, {})
                        offsets[group][topic] = ensure_dict_key(
                            offsets[group][topic], partition, offset)
                        offsets[group][topic][partition] = offset
                        collectors.set_offsets(offsets)

                        commits = ensure_dict_key(commits, group, {})
                        commits[group] = ensure_dict_key(
                            commits[group], topic, {})
                        commits[group][topic] = ensure_dict_key(
                            commits[group][topic], partition, 0)
                        commits[group][topic][partition] += 1
                        collectors.set_commits(commits)

                # Check if we need to run any scheduled jobs
                # each message.
                scheduled_jobs = scheduler.run_scheduled_jobs(scheduled_jobs)

            # Also check if we need to run any scheduled jobs
            # each time the consumer times out, in case there
            # aren't any messages to consume.
            scheduled_jobs = scheduler.run_scheduled_jobs(scheduled_jobs)

    except KeyboardInterrupt:
        pass

    shutdown()
예제 #8
0
def main():
    signal.signal(signal.SIGTERM, signal_handler)

    parser = argparse.ArgumentParser(
        description='Export Kafka consumer offsets to Prometheus.')
    parser.add_argument(
        '-b',
        '--bootstrap-brokers',
        default='localhost',
        help='Addresses of brokers in a Kafka cluster to talk to.' +
        ' Brokers should be separated by commas e.g. broker1,broker2.' +
        ' Ports can be provided if non-standard (9092) e.g. brokers1:9999.' +
        ' (default: localhost)')
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=9208,
        help='Port to serve the metrics endpoint on. (default: 9208)')
    parser.add_argument(
        '-s',
        '--from-start',
        action='store_true',
        help='Start from the beginning of the `__consumer_offsets` topic.')
    parser.add_argument(
        '--topic-interval',
        type=float,
        default=30.0,
        help='How often to refresh topic information, in seconds. (default: 30)'
    )
    parser.add_argument(
        '--high-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh high-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--consumer-config',
        action='append',
        default=[],
        help=
        'Provide additional Kafka consumer config as a consumer.properties file. Multiple files will be merged, later files having precedence.'
    )
    parser.add_argument('-j',
                        '--json-logging',
                        action='store_true',
                        help='Turn on json logging.')
    parser.add_argument(
        '--log-level',
        default='INFO',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='detail level to log. (default: INFO)')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='turn on verbose (DEBUG) logging. Overrides --log-level.')
    args = parser.parse_args()

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(log_format) \
        if args.json_logging \
        else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, args.log_level)
    logging.basicConfig(handlers=[log_handler],
                        level=logging.DEBUG if args.verbose else log_level)
    logging.captureWarnings(True)

    port = args.port

    consumer_config = {
        'bootstrap_servers': 'localhost',
        'auto_offset_reset': 'latest',
        'group_id': None,
        'consumer_timeout_ms': 500
    }

    for filename in args.consumer_config:
        with open(filename) as f:
            raw_config = javaproperties.load(f)
            converted_config = {
                k.replace('.', '_'): v
                for k, v in raw_config.items()
            }
            consumer_config.update(converted_config)

    if args.bootstrap_brokers:
        consumer_config['bootstrap_servers'] = args.bootstrap_brokers.split(
            ',')

    if args.from_start:
        consumer_config['auto_offset_reset'] = 'earliest'

    consumer = KafkaConsumer('__consumer_offsets', **consumer_config)
    client = consumer._client

    topic_interval = args.topic_interval
    high_water_interval = args.high_water_interval

    logging.info('Starting server...')
    start_http_server(port)
    logging.info('Server started on port %s', port)

    def read_short(bytes):
        num = unpack_from('>h', bytes)[0]
        remaining = bytes[2:]
        return (num, remaining)

    def read_int(bytes):
        num = unpack_from('>i', bytes)[0]
        remaining = bytes[4:]
        return (num, remaining)

    def read_long_long(bytes):
        num = unpack_from('>q', bytes)[0]
        remaining = bytes[8:]
        return (num, remaining)

    def read_string(bytes):
        length, remaining = read_short(bytes)
        string = remaining[:length].decode('utf-8')
        remaining = remaining[length:]
        return (string, remaining)

    def parse_key(bytes):
        (version, remaining_key) = read_short(bytes)
        if version == 1 or version == 0:
            (group, remaining_key) = read_string(remaining_key)
            (topic, remaining_key) = read_string(remaining_key)
            (partition, remaining_key) = read_int(remaining_key)
            return (version, group, topic, partition)

    def parse_value(bytes):
        (version, remaining_key) = read_short(bytes)
        if version == 0:
            (offset, remaining_key) = read_long_long(remaining_key)
            (metadata, remaining_key) = read_string(remaining_key)
            (timestamp, remaining_key) = read_long_long(remaining_key)
            return (version, offset, metadata, timestamp)
        elif version == 1:
            (offset, remaining_key) = read_long_long(remaining_key)
            (metadata, remaining_key) = read_string(remaining_key)
            (commit_timestamp, remaining_key) = read_long_long(remaining_key)
            (expire_timestamp, remaining_key) = read_long_long(remaining_key)
            return (version, offset, metadata, commit_timestamp,
                    expire_timestamp)

    def update_topics(api_version, metadata):
        logging.info('Received topics and partition assignments')

        global topics

        if api_version == 0:
            TOPIC_ERROR = 0
            TOPIC_NAME = 1
            TOPIC_PARTITIONS = 2
            PARTITION_ERROR = 0
            PARTITION_NUMBER = 1
            PARTITION_LEADER = 2
        else:
            TOPIC_ERROR = 0
            TOPIC_NAME = 1
            TOPIC_PARTITIONS = 3
            PARTITION_ERROR = 0
            PARTITION_NUMBER = 1
            PARTITION_LEADER = 2

        new_topics = {}
        for t in metadata.topics:
            error_code = t[TOPIC_ERROR]
            if error_code:
                error = Errors.for_code(error_code)(t)
                logging.warning(
                    'Received error in metadata response at topic level: %s',
                    error)
            else:
                topic = t[TOPIC_NAME]
                partitions = t[TOPIC_PARTITIONS]

                new_partitions = {}
                for p in partitions:
                    error_code = p[PARTITION_ERROR]
                    if error_code:
                        error = Errors.for_code(error_code)(p)
                        logging.warning(
                            'Received error in metadata response at partition level for topic %(topic)s: %(error)s',
                            {
                                'topic': topic,
                                'error': error
                            })
                    else:
                        partition = p[PARTITION_NUMBER]
                        leader = p[PARTITION_LEADER]
                        logging.debug(
                            'Received partition assignment for partition %(partition)s of topic %(topic)s',
                            {
                                'partition': partition,
                                'topic': topic
                            })

                        new_partitions[partition] = leader

                new_topics[topic] = new_partitions

        topics = new_topics

    def update_highwater(offsets):
        logging.info('Received high-water marks')

        for topic, partitions in offsets.topics:
            for partition, error_code, offsets in partitions:
                if error_code:
                    error = Errors.for_code(error_code)(
                        (partition, error_code, offsets))
                    logging.warning(
                        'Received error in offset response for topic %(topic)s: %(error)s',
                        {
                            'topic': topic,
                            'error': error
                        })
                else:
                    logging.debug(
                        'Received high-water marks for partition %(partition)s of topic %(topic)s',
                        {
                            'partition': partition,
                            'topic': topic
                        })

                    update_gauge(
                        metric_name='kafka_topic_highwater',
                        label_dict={
                            'topic': topic,
                            'partition': partition
                        },
                        value=offsets[0],
                        doc='The offset of the head of a partition in a topic.'
                    )

    def fetch_topics(this_time):
        logging.info('Requesting topics and partition assignments')

        next_time = this_time + topic_interval
        try:
            node = client.least_loaded_node()

            logging.debug(
                'Requesting topics and partition assignments from %(node)s',
                {'node': node})

            api_version = 0 if client.config['api_version'] < (0, 10) else 1
            request = MetadataRequest[api_version](None)
            f = client.send(node, request)
            f.add_callback(update_topics, api_version)
        except Exception:
            logging.exception(
                'Error requesting topics and partition assignments')
        finally:
            client.schedule(partial(fetch_topics, next_time), next_time)

    def fetch_highwater(this_time):
        logging.info('Requesting high-water marks')
        next_time = this_time + high_water_interval
        try:
            global topics
            if topics:
                nodes = {}
                for topic, partition_map in topics.items():
                    for partition, leader in partition_map.items():
                        if leader not in nodes:
                            nodes[leader] = {}
                        if topic not in nodes[leader]:
                            nodes[leader][topic] = []
                        nodes[leader][topic].append(partition)

                for node, topic_map in nodes.items():
                    logging.debug('Requesting high-water marks from %(node)s',
                                  {
                                      'topic': topic,
                                      'node': node
                                  })

                    request = OffsetRequest[0](
                        -1,
                        [(topic, [(partition, OffsetResetStrategy.LATEST, 1)
                                  for partition in partitions])
                         for topic, partitions in topic_map.items()])
                    f = client.send(node, request)
                    f.add_callback(update_highwater)
        except Exception:
            logging.exception('Error requesting high-water marks')
        finally:
            client.schedule(partial(fetch_highwater, next_time), next_time)

    now_time = time.time()

    fetch_topics(now_time)
    fetch_highwater(now_time)

    try:
        while True:
            for message in consumer:
                update_gauge(
                    metric_name=METRIC_PREFIX + 'exporter_offset',
                    label_dict={'partition': message.partition},
                    value=message.offset,
                    doc=
                    'The current offset of the exporter consumer in a partition of the __consumer_offsets topic.'
                )

                if message.key and message.value:
                    key = parse_key(message.key)
                    if key:
                        value = parse_value(message.value)

                        update_gauge(
                            metric_name=METRIC_PREFIX + 'offset',
                            label_dict={
                                'group': key[1],
                                'topic': key[2],
                                'partition': key[3]
                            },
                            value=value[1],
                            doc=
                            'The current offset of a consumer group in a partition of a topic.'
                        )

                        increment_counter(
                            metric_name=METRIC_PREFIX + 'commits',
                            label_dict={
                                'group': key[1],
                                'topic': key[2],
                                'partition': key[3]
                            },
                            doc=
                            'The number of commit messages read by the exporter consumer from a consumer group for a partition of a topic.'
                        )

    except KeyboardInterrupt:
        pass

    shutdown()
예제 #9
0
def cli(**options):
    """Export MySQL query results to Prometheus."""

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(
        log_format) if options['json_logging'] else logging.Formatter(
            log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, options['log_level'])
    logging.basicConfig(
        handlers=[log_handler],
        level=logging.DEBUG if options['verbose'] else log_level)
    logging.captureWarnings(True)

    port = options['port']
    mysql_host, mysql_port = options['mysql_server']

    username = options['mysql_user']
    password = options['mysql_password']
    timezone = options['mysql_local_timezone']

    config = configparser.ConfigParser(converters=CONFIGPARSER_CONVERTERS)
    config.read_file(options['config_file'])

    config_dir_file_pattern = os.path.join(options['config_dir'], '*.cfg')
    config_dir_sorted_files = sorted(glob.glob(config_dir_file_pattern))
    config.read(config_dir_sorted_files)

    query_prefix = 'query_'
    queries = {}
    for section in config.sections():
        if section.startswith(query_prefix):
            query_name = section[len(query_prefix):]
            interval = config.getfloat(section,
                                       'QueryIntervalSecs',
                                       fallback=15)
            db_name = config.get(section, 'QueryDatabase')
            query = config.get(section, 'QueryStatement')
            value_columns = config.get(section, 'QueryValueColumns').split(',')
            on_error = config.getenum(section, 'QueryOnError', fallback='drop')
            on_missing = config.getenum(section,
                                        'QueryOnMissing',
                                        fallback='drop')

            queries[query_name] = (interval, db_name, query, value_columns,
                                   on_error, on_missing)

    scheduler = sched.scheduler()

    mysql_kwargs = dict(
        host=mysql_host,
        port=mysql_port,
        user=username,
        password=password,
        # Use autocommit mode to avoid keeping the same transaction across query
        # runs when the connection is reused. Using the same transaction would
        # prevent changes from being reflected in results, and therefore metrics.
        # Note: Queries could theoretically change data...
        autocommit=True)
    if timezone:
        mysql_kwargs['init_command'] = "SET time_zone = '{}'".format(timezone)

    mysql_client = PersistentDB(creator=pymysql, **mysql_kwargs)

    if queries:
        for query_name, (interval, db_name, query, value_columns, on_error,
                         on_missing) in queries.items():
            schedule_job(scheduler, interval, run_query, mysql_client,
                         query_name, db_name, query, value_columns, on_error,
                         on_missing)
    else:
        log.warning('No queries found in config file(s)')

    REGISTRY.register(QueryMetricCollector())

    log.info('Starting server...')
    start_http_server(port)
    log.info('Server started on port %(port)s', {'port': port})

    scheduler.run()
def main():
    signal.signal(signal.SIGTERM, signal_handler)

    parser = argparse.ArgumentParser(
        description='Export Kafka consumer offsets to Prometheus.')
    parser.add_argument(
        '-b',
        '--bootstrap-brokers',
        help='Addresses of brokers in a Kafka cluster to talk to.' +
        ' Brokers should be separated by commas e.g. broker1,broker2.' +
        ' Ports can be provided if non-standard (9092) e.g. brokers1:9999.' +
        ' (default: localhost)')
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=9208,
        help='Port to serve the metrics endpoint on. (default: 9208)')
    parser.add_argument('-c',
                        '--consumers',
                        type=int,
                        default=1,
                        help='Number of Kakfa consumers to use (parallelism)')
    parser.add_argument(
        '--use-confluent-kafka',
        action='store_true',
        help='Use confluent_kafka rather than kafka-python for consumption')
    parser.add_argument(
        '-s',
        '--from-start',
        action='store_true',
        help='Start from the beginning of the `__consumer_offsets` topic.')
    parser.add_argument(
        '--topic-interval',
        type=float,
        default=30.0,
        help='How often to refresh topic information, in seconds. (default: 30)'
    )
    parser.add_argument(
        '--high-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh high-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--low-water-interval',
        type=float,
        default=10.0,
        help=
        'How often to refresh low-water information, in seconds. (default: 10)'
    )
    parser.add_argument(
        '--consumer-config',
        action='append',
        default=[],
        help=
        'Provide additional Kafka consumer config as a consumer.properties file. Multiple files will be merged, later files having precedence.'
    )
    parser.add_argument('-j',
                        '--json-logging',
                        action='store_true',
                        help='Turn on json logging.')
    parser.add_argument(
        '--log-level',
        default='INFO',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        help='detail level to log. (default: INFO)')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='turn on verbose (DEBUG) logging. Overrides --log-level.')
    args = parser.parse_args()

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(log_format) \
        if args.json_logging \
        else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, args.log_level)
    logging.basicConfig(handlers=[log_handler],
                        level=logging.DEBUG if args.verbose else log_level)
    logging.captureWarnings(True)

    port = args.port

    consumer_config = {
        'auto_offset_reset': 'latest',
        'group_id': None,
        'consumer_timeout_ms': 500
    }

    # the same config is used both for kafka-python and confluent_kafka
    # most important properties have the same names (except _ being used instead of .)
    # one difference is that in case of single consumer kafka-python requires group_id not to be set
    # while confluent_kafka always requires to have group_id
    if not args.use_confluent_kafka:
        if args.consumers > 1:
            consumer_config[
                'group_id'] = 'prometheus-kafka-consumer-exporter-' + id_generator(
                )
            consumer_config['enable_auto_commit'] = False
    else:
        consumer_config[
            'group_id'] = 'prometheus-kafka-consumer-exporter-' + id_generator(
            )

    for filename in args.consumer_config:
        with open(filename) as f:
            raw_config = javaproperties.load(f)
            converted_config = {
                k: int(v) if v.isdigit() else
                True if v == 'True' else False if v == 'False' else v
                for k, v in raw_config.items()
            }
            consumer_config.update(converted_config)

    if not 'bootstrap_servers' in consumer_config:
        consumer_config['bootstrap_servers'] = 'localhost'
        logging.info('bootstrap_servers not specified - using localhost')

    if args.bootstrap_brokers:
        consumer_config['bootstrap_servers'] = args.bootstrap_brokers.split(
            ',')

    if args.from_start:
        consumer_config['auto_offset_reset'] = 'earliest'

    # retain only settings relevant for kafka-python
    kafka_python_consumer_config = cleanup_conf(consumer_config)

    consumer = KafkaConsumer(**kafka_python_consumer_config)
    client = consumer._client

    topic_interval = args.topic_interval
    high_water_interval = args.high_water_interval
    low_water_interval = args.low_water_interval

    logging.info('Starting server...')
    start_http_server(port)
    logging.info('Server started on port %s', port)

    REGISTRY.register(collectors.HighwaterCollector())
    REGISTRY.register(collectors.LowwaterCollector())
    REGISTRY.register(collectors.ConsumerOffsetCollector())
    REGISTRY.register(collectors.ConsumerLagCollector())
    REGISTRY.register(collectors.ConsumerLeadCollector())
    REGISTRY.register(collectors.ConsumerCommitsCollector())
    REGISTRY.register(collectors.ExporterOffsetCollector())
    REGISTRY.register(collectors.ExporterLagCollector())
    REGISTRY.register(collectors.ExporterLeadCollector())

    scheduled_jobs = setup_fetch_jobs(topic_interval, high_water_interval,
                                      low_water_interval, client)

    mpc = MultiProcessConsumer(args.use_confluent_kafka, args.consumers, 5,
                               args.json_logging, args.log_level, args.verbose,
                               **consumer_config)
    try:
        while True:
            for item in mpc:

                offsets = collectors.get_offsets()
                commits = collectors.get_commits()
                exporter_offsets = collectors.get_exporter_offsets()

                exporter_offsets = merge_exporter_offsets(
                    exporter_offsets, item[0])
                offsets = merge_offsets(offsets, item[1])
                commits = merge_offsets(commits, item[2])

                collectors.set_exporter_offsets(exporter_offsets)
                collectors.set_offsets(offsets)
                collectors.set_commits(commits)

                # Check if we need to run any scheduled jobs
                # each message.
                scheduled_jobs = scheduler.run_scheduled_jobs(scheduled_jobs)

            # Also check if we need to run any scheduled jobs
            # each time the consumer times out, in case there
            # aren't any messages to consume.
            scheduled_jobs = scheduler.run_scheduled_jobs(scheduled_jobs)

    except KeyboardInterrupt:
        pass

    mpc.stop()
    shutdown()
def _mp_consume(message_queue, report_inverval, json_logging, log_level,
                verbose, events, **consumer_options):
    conf = cleanup_conf(consumer_options)

    log_handler = logging.StreamHandler()
    log_format = '[%(asctime)s] %(name)s.%(levelname)s %(threadName)s %(message)s'
    formatter = JogFormatter(log_format) \
        if json_logging \
        else logging.Formatter(log_format)
    log_handler.setFormatter(formatter)

    log_level = getattr(logging, log_level)
    logging.basicConfig(handlers=[log_handler],
                        level=logging.DEBUG if verbose else log_level)
    logging.captureWarnings(True)
    this.logger = logging.getLogger(__name__)

    offsets = {}
    commits = {}
    exporter_offsets = {}
    while not events.exit.is_set():
        # Wait till the controller indicates us to start consumption
        events.start.wait()

        this.logger.info('Initialising Consumer')
        consumer = Consumer(conf, logger=this.logger)
        consumer.subscribe(['__consumer_offsets'], on_assign=on_assignment)

        start_time = time.time()
        current_report_interval = randint(1, report_inverval + 1)

        i = 0
        while True:
            # If we are asked to quit, do so - do not check to frequently
            if time.time() - start_time > current_report_interval and (
                    events.exit.is_set() or events.stop.is_set()):
                consumer.close()
                break

            message = consumer.poll(timeout=1.0)
            if message is None:
                continue
            if message.error():
                if message.error().code() == KafkaError._PARTITION_EOF:
                    this.logger.debug('Reached end of [%d] at offset %d',
                                      message.partition(), message.offset())
                    continue
                else:
                    this.logger.error('poll() failed: %r', message.error())

            exporter_partition = message.partition()
            exporter_offset = message.offset()
            exporter_offsets = ensure_dict_key(exporter_offsets,
                                               exporter_partition,
                                               exporter_offset)
            exporter_offsets[exporter_partition] = exporter_offset

            if message.key() and message.value():
                key = parse_key(message.key())
                if key:
                    i += 1
                    value = parse_value(message.value())

                    group = key[1]
                    topic = key[2]
                    partition = key[3]
                    offset = value[1]

                    offsets = ensure_dict_key(offsets, group, {})
                    offsets[group] = ensure_dict_key(offsets[group], topic, {})
                    offsets[group][topic] = ensure_dict_key(
                        offsets[group][topic], partition, offset)
                    offsets[group][topic][partition] = offset

                    commits = ensure_dict_key(commits, group, {})
                    commits[group] = ensure_dict_key(commits[group], topic, {})
                    commits[group][topic] = ensure_dict_key(
                        commits[group][topic], partition, 0)
                    commits[group][topic][partition] += 1

                    try:
                        if time.time() - start_time > current_report_interval:
                            this.logger.debug(
                                'Successfully processed %d/sec messages since last report',
                                i / current_report_interval)
                            current_report_interval = randint(
                                1, report_inverval + 1)
                            start_time = time.time()
                            message_queue.put(
                                (exporter_offsets, offsets, commits),
                                timeout=report_inverval * 2)
                            clear_commits(commits)
                            i = 0

                    except queue.Full:
                        this.logger.error('Queue is full, backing off')
                        current_report_interval *= 2