def __init__(self, host: str, client: ClientAPI, kademlia: KademliaAPI, influx_server: str, influx_user: str, influx_password: str, influx_database: str, influx_port: int = 443, influx_protocol: str = 'https', reporting_frequency: int = 10, process_collection_frequency: int = 3): self._influx_server = influx_server self._reporting_frequency = reporting_frequency self._process_collection_frequency = process_collection_frequency self._registry = HostMetricsRegistry(host) self._reporter = InfluxReporter( registry=self._registry, protocol=influx_protocol, port=influx_port, database=influx_database, username=influx_user, password=influx_password, server=influx_server ) self.client = client self.kademlia = kademlia
def __init__(self, influx_server: str, influx_user: str, influx_password: str, influx_database: str, host: str, port: int, protocol: str, reporting_frequency: int): self._unreported_error: Exception = None self._last_time_reported: float = 0.0 self._influx_server = influx_server self._reporting_frequency = reporting_frequency self._registry = HostMetricsRegistry(host) self._reporter = InfluxReporter(registry=self._registry, database=influx_database, username=influx_user, password=influx_password, protocol=protocol, port=port, server=influx_server)
def __init__(self, influx_server: str, influx_user: str, influx_password: str, influx_database: str, host: str, reporting_frequency: int = 10): self._influx_server = influx_server self._reporting_frequency = reporting_frequency self._registry = HostMetricsRegistry(host) self._reporter = InfluxReporter(registry=self._registry, protocol='https', port=443, database=influx_database, username=influx_user, password=influx_password, server=influx_server)
class MetricsService(Service): """ A service to provide a registry where metrics instruments can be registered and retrieved from. It continuously reports metrics to the specified InfluxDB instance. """ def __init__(self, influx_server: str, influx_user: str, influx_password: str, influx_database: str, host: str, reporting_frequency: int = 10): self._influx_server = influx_server self._reporting_frequency = reporting_frequency self._registry = HostMetricsRegistry(host) self._reporter = InfluxReporter(registry=self._registry, protocol='https', port=443, database=influx_database, username=influx_user, password=influx_password, server=influx_server) logger = get_extended_debug_logger( 'trinity.components.builtin.metrics.MetricsService') @property def registry(self) -> HostMetricsRegistry: """ Return the :class:`trinity.components.builtin.metrics.registry.HostMetricsRegistry` at which metrics instruments can be registered and retrieved. """ return self._registry async def run(self) -> None: self.logger.info("Reporting metrics to %s", self._influx_server) self.manager.run_daemon_task(self._continuously_report) await self.manager.wait_finished() async def _continuously_report(self) -> None: async for _ in trio_utils.every(self._reporting_frequency): self._reporter.report_now()
def main(args=None): if args is None: args = sys.argv[1:] opts = parse_args(args) verbose_level = opts.verbose # Determine if any args which support delimited lists should be # modified if opts.peers: opts.peers = _split_comma_append_args(opts.peers) if opts.seeds: opts.seeds = _split_comma_append_args(opts.seeds) init_console_logging(verbose_level=verbose_level) if opts.network_auth: opts.network_auth = {"network": opts.network_auth} try: path_config = load_path_config(config_dir=opts.config_dir) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) try: opts_config = create_validator_config(opts) validator_config = \ load_validator_config(opts_config, path_config.config_dir) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) # Process initial initialization errors, delaying the sys.exit(1) until # all errors have been reported to the user (via LOGGER.error()). This # is intended to provide enough information to the user so they can correct # multiple errors before restarting the validator. init_errors = False try: identity_signer = load_identity_signer(key_dir=path_config.key_dir, key_name='validator') except LocalConfigurationError as e: log_configuration(log_dir=path_config.log_dir, name="validator") LOGGER.error(str(e)) init_errors = True log_config = get_log_config() if not init_errors: if log_config is not None: log_configuration(log_config=log_config) if log_config.get('root') is not None: init_console_logging(verbose_level=verbose_level) else: log_configuration(log_dir=path_config.log_dir, name="validator") try: version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version except pkg_resources.DistributionNotFound: version = 'UNKNOWN' LOGGER.info('%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME, version) if LOGGER.isEnabledFor(logging.INFO): LOGGER.info('; '.join([ 'config [path]: {}'.format(line) for line in path_config.to_toml_string() ])) if not check_directory(path=path_config.data_dir, human_readable_name='Data'): init_errors = True if not check_directory(path=path_config.log_dir, human_readable_name='Log'): init_errors = True endpoint = validator_config.endpoint if endpoint is None: # Need to use join here to get the string "0.0.0.0". Otherwise, # bandit thinks we are binding to all interfaces and returns a # Medium security risk. interfaces = ["*", ".".join(["0", "0", "0", "0"])] interfaces += netifaces.interfaces() endpoint = validator_config.bind_network for interface in interfaces: if interface in validator_config.bind_network: LOGGER.error("Endpoint must be set when using %s", interface) init_errors = True break if init_errors: LOGGER.error("Initialization errors occurred (see previous log " "ERROR messages), shutting down.") sys.exit(1) bind_network = validator_config.bind_network bind_component = validator_config.bind_component if "tcp://" not in bind_network: bind_network = "tcp://" + bind_network if "tcp://" not in bind_component: bind_component = "tcp://" + bind_component if validator_config.network_public_key is None or \ validator_config.network_private_key is None: LOGGER.warning("Network key pair is not configured, Network " "communications between validators will not be " "authenticated or encrypted.") wrapped_registry = None metrics_reporter = None if validator_config.opentsdb_url: LOGGER.info("Adding metrics reporter: url=%s, db=%s", validator_config.opentsdb_url, validator_config.opentsdb_db) url = urlparse(validator_config.opentsdb_url) proto, db_server, db_port, = url.scheme, url.hostname, url.port registry = MetricsRegistry() wrapped_registry = MetricsRegistryWrapper(registry) metrics_reporter = InfluxReporter( registry=registry, reporting_interval=10, database=validator_config.opentsdb_db, prefix="sawtooth_validator", port=db_port, protocol=proto, server=db_server, username=validator_config.opentsdb_username, password=validator_config.opentsdb_password) metrics_reporter.start() # Verify state integrity before startup verify_state(bind_network, bind_component, validator_config.scheduler, path_config.data_dir) LOGGER.info('Starting validator with %s scheduler', validator_config.scheduler) validator = Validator(bind_network, bind_component, endpoint, validator_config.peering, validator_config.seeds, validator_config.peers, path_config.data_dir, path_config.config_dir, identity_signer, validator_config.scheduler, validator_config.permissions, validator_config.minimum_peer_connectivity, validator_config.maximum_peer_connectivity, validator_config.network_public_key, validator_config.network_private_key, roles=validator_config.roles, metrics_registry=wrapped_registry) # pylint: disable=broad-except try: validator.start() except KeyboardInterrupt: LOGGER.info("Initiating graceful " "shutdown (press Ctrl+C again to force)") except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) except GenesisError as genesis_err: LOGGER.error(str(genesis_err)) sys.exit(1) except Exception as e: LOGGER.exception(e) sys.exit(1) finally: if metrics_reporter: metrics_reporter.stop() validator.stop()
def main(args=None): if args is None: args = sys.argv[1:] opts = parse_args(args) verbose_level = opts.verbose # Determine if any args which support delimited lists should be # modified if opts.peers: opts.peers = _split_comma_append_args(opts.peers) if opts.seeds: opts.seeds = _split_comma_append_args(opts.seeds) init_console_logging(verbose_level=verbose_level) if opts.network_auth: opts.network_auth = {"network": opts.network_auth} try: path_config = load_path_config(config_dir=opts.config_dir) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) try: opts_config = create_validator_config(opts) validator_config = \ load_validator_config(opts_config, path_config.config_dir) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) # Process initial initialization errors, delaying the sys.exit(1) until # all errors have been reported to the user (via LOGGER.error()). This # is intended to provide enough information to the user so they can correct # multiple errors before restarting the validator. init_errors = False try: identity_signer = load_identity_signer( key_dir=path_config.key_dir, key_name='validator') except LocalConfigurationError as e: log_configuration(log_dir=path_config.log_dir, name="validator") LOGGER.error(str(e)) init_errors = True log_config = get_log_config() if not init_errors: if log_config is not None: log_configuration(log_config=log_config) if log_config.get('root') is not None: init_console_logging(verbose_level=verbose_level) else: log_configuration(log_dir=path_config.log_dir, name="validator") try: version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version except pkg_resources.DistributionNotFound: version = 'UNKNOWN' LOGGER.info( '%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME, version) if LOGGER.isEnabledFor(logging.INFO): LOGGER.info( '; '.join([ 'config [path]: {}'.format(line) for line in path_config.to_toml_string() ]) ) if not check_directory(path=path_config.data_dir, human_readable_name='Data'): init_errors = True if not check_directory(path=path_config.log_dir, human_readable_name='Log'): init_errors = True endpoint = validator_config.endpoint if endpoint is None: # Need to use join here to get the string "0.0.0.0". Otherwise, # bandit thinks we are binding to all interfaces and returns a # Medium security risk. interfaces = ["*", ".".join(["0", "0", "0", "0"])] interfaces += netifaces.interfaces() endpoint = validator_config.bind_network for interface in interfaces: if interface in validator_config.bind_network: LOGGER.error("Endpoint must be set when using %s", interface) init_errors = True break if init_errors: LOGGER.error("Initialization errors occurred (see previous log " "ERROR messages), shutting down.") sys.exit(1) bind_network = validator_config.bind_network bind_component = validator_config.bind_component if "tcp://" not in bind_network: bind_network = "tcp://" + bind_network if "tcp://" not in bind_component: bind_component = "tcp://" + bind_component if validator_config.network_public_key is None or \ validator_config.network_private_key is None: LOGGER.warning("Network key pair is not configured, Network " "communications between validators will not be " "authenticated or encrypted.") wrapped_registry = None metrics_reporter = None if validator_config.opentsdb_url: LOGGER.info("Adding metrics reporter: url=%s, db=%s", validator_config.opentsdb_url, validator_config.opentsdb_db) url = urlparse(validator_config.opentsdb_url) proto, db_server, db_port, = url.scheme, url.hostname, url.port registry = MetricsRegistry() wrapped_registry = MetricsRegistryWrapper(registry) metrics_reporter = InfluxReporter( registry=registry, reporting_interval=10, database=validator_config.opentsdb_db, prefix="sawtooth_validator", port=db_port, protocol=proto, server=db_server, username=validator_config.opentsdb_username, password=validator_config.opentsdb_password) metrics_reporter.start() LOGGER.info( 'Starting validator with %s scheduler', validator_config.scheduler) validator = Validator( bind_network, bind_component, endpoint, validator_config.peering, validator_config.seeds, validator_config.peers, path_config.data_dir, path_config.config_dir, identity_signer, validator_config.scheduler, validator_config.permissions, validator_config.minimum_peer_connectivity, validator_config.maximum_peer_connectivity, validator_config.network_public_key, validator_config.network_private_key, roles=validator_config.roles, metrics_registry=wrapped_registry) # pylint: disable=broad-except try: validator.start() except KeyboardInterrupt: LOGGER.info("Initiating graceful " "shutdown (press Ctrl+C again to force)") except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) except GenesisError as genesis_err: LOGGER.error(str(genesis_err)) sys.exit(1) except Exception as e: LOGGER.exception(e) sys.exit(1) finally: if metrics_reporter: metrics_reporter.stop() validator.stop()
def main(): loop = ZMQEventLoop() asyncio.set_event_loop(loop) connection = None try: opts = parse_args(sys.argv[1:]) opts_config = RestApiConfig(bind=opts.bind, connect=opts.connect, timeout=opts.timeout, opentsdb_url=opts.opentsdb_url, opentsdb_db=opts.opentsdb_db) rest_api_config = load_rest_api_config(opts_config) url = None if "tcp://" not in rest_api_config.connect: url = "tcp://" + rest_api_config.connect else: url = rest_api_config.connect connection = Connection(url) log_config = get_log_config(filename="rest_api_log_config.toml") # If no toml, try loading yaml if log_config is None: log_config = get_log_config(filename="rest_api_log_config.yaml") if log_config is not None: log_configuration(log_config=log_config) else: log_dir = get_log_dir() log_configuration(log_dir=log_dir, name="rest_api") init_console_logging(verbose_level=opts.verbose) try: host, port = rest_api_config.bind[0].split(":") port = int(port) except ValueError as e: print("Unable to parse binding {}: Must be in the format" " host:port".format(rest_api_config.bind[0])) sys.exit(1) wrapped_registry = None if rest_api_config.opentsdb_url: LOGGER.info("Adding metrics reporter: url=%s, db=%s", rest_api_config.opentsdb_url, rest_api_config.opentsdb_db) url = urlparse(rest_api_config.opentsdb_url) proto, db_server, db_port, = url.scheme, url.hostname, url.port registry = MetricsRegistry() wrapped_registry = MetricsRegistryWrapper(registry) reporter = InfluxReporter( registry=registry, reporting_interval=10, database=rest_api_config.opentsdb_db, prefix="sawtooth_rest_api", port=db_port, protocol=proto, server=db_server, username=rest_api_config.opentsdb_username, password=rest_api_config.opentsdb_password) reporter.start() start_rest_api(host, port, connection, int(rest_api_config.timeout), wrapped_registry) # pylint: disable=broad-except except Exception as e: LOGGER.exception(e) sys.exit(1) finally: if connection is not None: connection.close()
def main(args): try: path_config = load_path_config(config_dir=args['config_dir']) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) try: opts_config = ValidatorConfig( bind_component=args['bind_component'], bind_network=args['bind_network'], bind_consensus=args['bind_consensus'], endpoint=args['endpoint'], maximum_peer_connectivity=args['maximum_peer_connectivity'], minimum_peer_connectivity=args['minimum_peer_connectivity'], roles=args['roles'], opentsdb_db=args['opentsdb_db'], opentsdb_url=args['opentsdb_url'], peering=args['peering'], peers=args['peers'], scheduler=args['scheduler'], seeds=args['seeds'], state_pruning_block_depth=args['state_pruning_block_depth'], fork_cache_keep_time=args['fork_cache_keep_time'], ) validator_config = \ load_validator_config(opts_config, path_config.config_dir) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) try: log_configuration(log_dir=path_config.log_dir, name="validator") except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) # Process initial initialization errors, delaying the sys.exit(1) until # all errors have been reported to the user (via LOGGER.error()). This # is intended to provide enough information to the user so they can correct # multiple errors before restarting the validator. init_errors = False try: identity_signer = load_identity_signer(key_dir=path_config.key_dir, key_name='validator') except LocalConfigurationError as e: LOGGER.error(str(e)) init_errors = True log_config = get_log_config() if not init_errors: if log_config is not None: log_configuration(log_config=log_config) if log_config.get('root') is not None: init_console_logging(verbose_level=args['verbose']) else: log_configuration(log_dir=path_config.log_dir, name="validator") try: version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version except pkg_resources.DistributionNotFound: version = 'UNKNOWN' LOGGER.info('%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME, version) if LOGGER.isEnabledFor(logging.INFO): LOGGER.info('; '.join([ 'config [path]: {}'.format(line) for line in path_config.to_toml_string() ])) if not check_directory(path=path_config.data_dir, human_readable_name='Data'): init_errors = True if not check_directory(path=path_config.log_dir, human_readable_name='Log'): init_errors = True endpoint = validator_config.endpoint if endpoint is None: # Need to use join here to get the string "0.0.0.0". Otherwise, # bandit thinks we are binding to all interfaces and returns a # Medium security risk. interfaces = ["*", ".".join(["0", "0", "0", "0"])] interfaces += netifaces.interfaces() endpoint = validator_config.bind_network parsed_endpoint = urlparse(validator_config.bind_network) for interface in interfaces: if interface == parsed_endpoint.hostname: LOGGER.error("Endpoint must be set when using %s", interface) init_errors = True if init_errors: LOGGER.error("Initialization errors occurred (see previous log " "ERROR messages), shutting down.") sys.exit(1) bind_network = validator_config.bind_network bind_component = validator_config.bind_component bind_consensus = validator_config.bind_consensus if "tcp://" not in bind_network: bind_network = "tcp://" + bind_network if "tcp://" not in bind_component: bind_component = "tcp://" + bind_component if bind_consensus and "tcp://" not in bind_consensus: bind_consensus = "tcp://" + bind_consensus if validator_config.network_public_key is None or \ validator_config.network_private_key is None: LOGGER.warning("Network key pair is not configured, Network " "communications between validators will not be " "authenticated or encrypted.") metrics_reporter = None if validator_config.opentsdb_url: LOGGER.info("Adding metrics reporter: url=%s, db=%s", validator_config.opentsdb_url, validator_config.opentsdb_db) url = urlparse(validator_config.opentsdb_url) proto, db_server, db_port, = url.scheme, url.hostname, url.port registry = MetricsRegistry() metrics.init_metrics(registry=registry) metrics_reporter = InfluxReporter( registry=registry, reporting_interval=10, database=validator_config.opentsdb_db, prefix="sawtooth_validator", port=db_port, protocol=proto, server=db_server, username=validator_config.opentsdb_username, password=validator_config.opentsdb_password) metrics_reporter.start() else: metrics.init_metrics() # Verify state integrity before startup global_state_db, blockstore = state_verifier.get_databases( bind_network, path_config.data_dir) state_verifier.verify_state(global_state_db, blockstore, bind_component, validator_config.scheduler) # Explicitly drop this, so there are not two db instances global_state_db.drop() global_state_db = None LOGGER.info('Starting validator with %s scheduler', validator_config.scheduler) component_workers = validator_config.component_thread_pool_workers network_workers = validator_config.network_thread_pool_workers sig_workers = validator_config.signature_thread_pool_workers validator = Validator(bind_network, bind_component, bind_consensus, endpoint, validator_config.peering, validator_config.seeds, validator_config.peers, path_config.data_dir, path_config.config_dir, identity_signer, path_config.key_dir, validator_config.scheduler, validator_config.permissions, validator_config.minimum_peer_connectivity, validator_config.maximum_peer_connectivity, validator_config.state_pruning_block_depth, validator_config.fork_cache_keep_time, validator_config.network_public_key, validator_config.network_private_key, roles=validator_config.roles, component_thread_pool_workers=component_workers, network_thread_pool_workers=network_workers, signature_thread_pool_workers=sig_workers) # pylint: disable=broad-except try: validator.start() except KeyboardInterrupt: LOGGER.info("Initiating graceful " "shutdown (press Ctrl+C again to force)") except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) except GenesisError as genesis_err: LOGGER.error(str(genesis_err)) sys.exit(1) except Exception as e: LOGGER.exception(e) sys.exit(1) finally: if metrics_reporter: metrics_reporter.stop() validator.stop()
def main(args): try: path_config = load_path_config(config_dir=args['config_dir']) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) try: opts_config = ValidatorConfig( bind_component=args['bind_component'], bind_network=args['bind_network'], bind_consensus=args['bind_consensus'], endpoint=args['endpoint'], maximum_peer_connectivity=args['maximum_peer_connectivity'], minimum_peer_connectivity=args['minimum_peer_connectivity'], roles=args['roles'], opentsdb_db=args['opentsdb_db'], opentsdb_url=args['opentsdb_url'], peering=args['peering'], peers=args['peers'], scheduler=args['scheduler'], seeds=args['seeds'], state_pruning_block_depth=args['state_pruning_block_depth'], fork_cache_keep_time=args['fork_cache_keep_time'], ) validator_config = \ load_validator_config(opts_config, path_config.config_dir) except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) try: log_configuration(log_dir=path_config.log_dir, name="validator") except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) # Process initial initialization errors, delaying the sys.exit(1) until # all errors have been reported to the user (via LOGGER.error()). This # is intended to provide enough information to the user so they can correct # multiple errors before restarting the validator. init_errors = False try: identity_signer = load_identity_signer( key_dir=path_config.key_dir, key_name='validator') except LocalConfigurationError as e: LOGGER.error(str(e)) init_errors = True log_config = get_log_config() if not init_errors: if log_config is not None: log_configuration(log_config=log_config) if log_config.get('root') is not None: init_console_logging(verbose_level=args['verbose']) else: log_configuration(log_dir=path_config.log_dir, name="validator") try: version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version except pkg_resources.DistributionNotFound: version = 'UNKNOWN' LOGGER.info( '%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME, version) if LOGGER.isEnabledFor(logging.INFO): LOGGER.info( '; '.join([ 'config [path]: {}'.format(line) for line in path_config.to_toml_string() ]) ) if not check_directory(path=path_config.data_dir, human_readable_name='Data'): init_errors = True if not check_directory(path=path_config.log_dir, human_readable_name='Log'): init_errors = True endpoint = validator_config.endpoint if endpoint is None: # Need to use join here to get the string "0.0.0.0". Otherwise, # bandit thinks we are binding to all interfaces and returns a # Medium security risk. interfaces = ["*", ".".join(["0", "0", "0", "0"])] interfaces += netifaces.interfaces() endpoint = validator_config.bind_network parsed_endpoint = urlparse(validator_config.bind_network) for interface in interfaces: if interface == parsed_endpoint.hostname: LOGGER.error("Endpoint must be set when using %s", interface) init_errors = True if init_errors: LOGGER.error("Initialization errors occurred (see previous log " "ERROR messages), shutting down.") sys.exit(1) bind_network = validator_config.bind_network bind_component = validator_config.bind_component bind_consensus = validator_config.bind_consensus if "tcp://" not in bind_network: bind_network = "tcp://" + bind_network if "tcp://" not in bind_component: bind_component = "tcp://" + bind_component if bind_consensus and "tcp://" not in bind_consensus: bind_consensus = "tcp://" + bind_consensus if validator_config.network_public_key is None or \ validator_config.network_private_key is None: LOGGER.warning("Network key pair is not configured, Network " "communications between validators will not be " "authenticated or encrypted.") metrics_reporter = None if validator_config.opentsdb_url: LOGGER.info("Adding metrics reporter: url=%s, db=%s", validator_config.opentsdb_url, validator_config.opentsdb_db) url = urlparse(validator_config.opentsdb_url) proto, db_server, db_port, = url.scheme, url.hostname, url.port registry = MetricsRegistry() metrics.init_metrics(registry=registry) metrics_reporter = InfluxReporter( registry=registry, reporting_interval=10, database=validator_config.opentsdb_db, prefix="sawtooth_validator", port=db_port, protocol=proto, server=db_server, username=validator_config.opentsdb_username, password=validator_config.opentsdb_password) metrics_reporter.start() else: metrics.init_metrics() # Verify state integrity before startup global_state_db, blockstore = state_verifier.get_databases( bind_network, path_config.data_dir) state_verifier.verify_state( global_state_db, blockstore, bind_component, validator_config.scheduler) # Explicitly drop this, so there are not two db instances global_state_db.drop() global_state_db = None LOGGER.info( 'Starting validator with %s scheduler', validator_config.scheduler) component_workers = validator_config.component_thread_pool_workers network_workers = validator_config.network_thread_pool_workers sig_workers = validator_config.signature_thread_pool_workers validator = Validator( bind_network, bind_component, bind_consensus, endpoint, validator_config.peering, validator_config.seeds, validator_config.peers, path_config.data_dir, path_config.config_dir, identity_signer, validator_config.scheduler, validator_config.permissions, validator_config.minimum_peer_connectivity, validator_config.maximum_peer_connectivity, validator_config.state_pruning_block_depth, validator_config.fork_cache_keep_time, validator_config.network_public_key, validator_config.network_private_key, roles=validator_config.roles, component_thread_pool_workers=component_workers, network_thread_pool_workers=network_workers, signature_thread_pool_workers=sig_workers) # pylint: disable=broad-except try: validator.start() except KeyboardInterrupt: LOGGER.info("Initiating graceful " "shutdown (press Ctrl+C again to force)") except LocalConfigurationError as local_config_err: LOGGER.error(str(local_config_err)) sys.exit(1) except GenesisError as genesis_err: LOGGER.error(str(genesis_err)) sys.exit(1) except Exception as e: LOGGER.exception(e) sys.exit(1) finally: if metrics_reporter: metrics_reporter.stop() validator.stop()
def main(): loop = ZMQEventLoop() asyncio.set_event_loop(loop) connection = None try: opts = parse_args(sys.argv[1:]) opts_config = RestApiConfig( bind=opts.bind, connect=opts.connect, timeout=opts.timeout, opentsdb_url=opts.opentsdb_url, opentsdb_db=opts.opentsdb_db) rest_api_config = load_rest_api_config(opts_config) url = None if "tcp://" not in rest_api_config.connect: url = "tcp://" + rest_api_config.connect else: url = rest_api_config.connect connection = Connection(url) log_config = get_log_config(filename="rest_api_log_config.toml") # If no toml, try loading yaml if log_config is None: log_config = get_log_config(filename="rest_api_log_config.yaml") if log_config is not None: log_configuration(log_config=log_config) else: log_dir = get_log_dir() log_configuration(log_dir=log_dir, name="rest_api") init_console_logging(verbose_level=opts.verbose) try: host, port = rest_api_config.bind[0].split(":") port = int(port) except ValueError as e: print("Unable to parse binding {}: Must be in the format" " host:port".format(rest_api_config.bind[0])) sys.exit(1) wrapped_registry = None if rest_api_config.opentsdb_url: LOGGER.info("Adding metrics reporter: url=%s, db=%s", rest_api_config.opentsdb_url, rest_api_config.opentsdb_db) url = urlparse(rest_api_config.opentsdb_url) proto, db_server, db_port, = url.scheme, url.hostname, url.port registry = MetricsRegistry() wrapped_registry = MetricsRegistryWrapper(registry) reporter = InfluxReporter( registry=registry, reporting_interval=10, database=rest_api_config.opentsdb_db, prefix="sawtooth_rest_api", port=db_port, protocol=proto, server=db_server, username=rest_api_config.opentsdb_username, password=rest_api_config.opentsdb_password) reporter.start() start_rest_api( host, port, connection, int(rest_api_config.timeout), wrapped_registry) # pylint: disable=broad-except except Exception as e: LOGGER.exception(e) sys.exit(1) finally: if connection is not None: connection.close()
class BaseMetricsService(Service, MetricsServiceAPI): """ A service to provide a registry where metrics instruments can be registered and retrieved from. It continuously reports metrics to the specified InfluxDB instance. """ MIN_SECONDS_BETWEEN_ERROR_LOGS = 60 def __init__(self, influx_server: str, influx_user: str, influx_password: str, influx_database: str, host: str, port: int, protocol: str, reporting_frequency: int): self._unreported_error: Exception = None self._last_time_reported: float = 0.0 self._influx_server = influx_server self._reporting_frequency = reporting_frequency self._registry = HostMetricsRegistry(host) self._reporter = InfluxReporter(registry=self._registry, database=influx_database, username=influx_user, password=influx_password, protocol=protocol, port=port, server=influx_server) logger = get_logger('trinity.components.builtin.metrics.MetricsService') @property def registry(self) -> HostMetricsRegistry: """ Return the :class:`trinity.components.builtin.metrics.registry.HostMetricsRegistry` at which metrics instruments can be registered and retrieved. """ return self._registry async def run(self) -> None: self.logger.info("Reporting metrics to %s", self._influx_server) self.manager.run_daemon_task(self.continuously_report) await self.manager.wait_finished() def report_now(self) -> None: try: self._reporter.report_now() except (HTTPException, ConnectionError) as exc: # This method is usually called every few seconds. If there's an issue with the # connection we do not want to flood the log and tame down warnings. # 1. We log the first instance of an exception immediately # 2. We log follow up exceptions only after a minimum time has elapsed # This means that we also might overwrite exceptions for different errors if self._is_justified_to_log_error(): self._log_and_clear(exc) else: self._unreported_error = exc else: # If errors disappear, we want to make sure we eventually report the last instance if self._unreported_error is not None and self._is_justified_to_log_error( ): self._log_and_clear(self._unreported_error) def _log_and_clear(self, error: Exception) -> None: self.logger.warning("Unable to report metrics: %s", error) self._unreported_error = None self._last_time_reported = time.monotonic() def _is_justified_to_log_error(self) -> bool: return (self._last_time_reported == 0.0 or time.monotonic() - self._last_time_reported > self.MIN_SECONDS_BETWEEN_ERROR_LOGS) @abstractmethod async def continuously_report(self) -> None: ...
class Metrics(Service): logger = logging.getLogger('alexandria.metrics.Metrics') def __init__(self, host: str, client: ClientAPI, kademlia: KademliaAPI, influx_server: str, influx_user: str, influx_password: str, influx_database: str, influx_port: int = 443, influx_protocol: str = 'https', reporting_frequency: int = 10, process_collection_frequency: int = 3): self._influx_server = influx_server self._reporting_frequency = reporting_frequency self._process_collection_frequency = process_collection_frequency self._registry = HostMetricsRegistry(host) self._reporter = InfluxReporter( registry=self._registry, protocol=influx_protocol, port=influx_port, database=influx_database, username=influx_user, password=influx_password, server=influx_server ) self.client = client self.kademlia = kademlia @classmethod def from_cli_args(cls, args: Namespace, client: ClientAPI, kademlia: KademliaAPI, ) -> 'Metrics': return cls( host=args.metrics_host, client=client, kademlia=kademlia, influx_server=args.metrics_influx_server, influx_user=args.metrics_influx_user, influx_password=args.metrics_influx_password, influx_database=args.metrics_influx_database, influx_port=args.metrics_influx_port, influx_protocol=args.metrics_influx_protocol, reporting_frequency=args.metrics_reporting_frequency, ) async def run(self) -> None: self.manager.run_daemon_task( self._continuously_report, self._reporting_frequency, ) self.manager.run_daemon_task( self._collect_system_metrics, self._process_collection_frequency, ) self.manager.run_daemon_task( self._report_routing_table_stats, 10, ) self.manager.run_daemon_task( self._report_content_manager_stats, 10, ) self.logger.info('Metrics started') for payload_type in PAYLOAD_TYPES: self.manager.run_daemon_task(self._report_inbound_message_stats, payload_type) self.manager.run_daemon_task(self._report_event, self.client.events.session_created, 'events/session-created') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.session_idle, 'events/session-idle') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.handshake_complete, 'events/handshake-complete') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.handshake_timeout, 'events/handshake-timeout') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.datagram_received, 'datagram/inbound') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.datagram_sent, 'datagram/outbound') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_ping, 'messages/outbound/Ping') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_pong, 'messages/outbound/Pong') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_find_nodes, 'messages/outbound/FindNodes') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_found_nodes, 'messages/outbound/FoundNodes') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_advertise, 'messages/outbound/Advertise') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_ack, 'messages/outbound/Ack') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_locate, 'messages/outbound/Locate') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_locations, 'messages/outbound/Locations') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_retrieve, 'messages/outbound/Retrieve') # noqa: E501 self.manager.run_daemon_task(self._report_event, self.client.events.sent_chunk, 'messages/outbound/Chunk') # noqa: E501 await self.manager.wait_finished() async def _continuously_report(self, frequency: int) -> None: async for _ in every(frequency): self._reporter.report_now() async def _report_event(self, event: EventAPI[Any], suffix: str) -> None: counter = self._registry.counter(f'alexandria.{suffix}.counter') meter = self._registry.meter(f'alexandria.{suffix}.meter') async with event.subscribe() as subscription: async for _ in subscription: counter.inc() meter.mark() async def _report_routing_table_stats(self, frequency: int) -> None: size_gauge = self._registry.gauge('alexandria.dht/routing-table/total-nodes.gauge') async for _ in every(frequency): stats = self.kademlia.routing_table.get_stats() size_gauge.set_value(stats.total_nodes) async def _report_inbound_message_stats(self, payload_type: Type[ssz.Serializable]) -> None: name = payload_type.__name__ counter = self._registry.counter(f'alexandria.messages/inbound/{name}.counter') meter = self._registry.meter(f'alexandria.messages/inbound/{name}.meter') async with self.client.message_dispatcher.subscribe(payload_type) as subscription: async for payload in subscription: counter.inc() meter.mark() async def _report_content_manager_stats(self, frequency: int) -> None: gauge = self._registry.gauge durable_db_item_count_gauge = gauge('alexandria.content/durable-db/item-count.gauge') ephemeral_db_item_count_gauge = gauge('alexandria.content/ephemeral-db/item-count.gauge') ephemeral_db_capacity_gauge = gauge('alexandria.content/ephemeral-db/capacity.gauge') ephemeral_db_size_gauge = gauge('alexandria.content/ephemeral-db/size.gauge') ephemeral_index_capacity_gauge = gauge('alexandria.content/ephemeral-index/capacity.gauge') ephemeral_index_size_gauge = gauge('alexandria.content/ephemeral-index/size.gauge') cache_db_item_count_gauge = gauge('alexandria.content/cache-db/item-count.gauge') cache_db_capacity_gauge = gauge('alexandria.content/cache-db/capacity.gauge') cache_db_size_gauge = gauge('alexandria.content/cache-db/size.gauge') cache_index_capacity_gauge = gauge('alexandria.content/cache-index/capacity.gauge') cache_index_size_gauge = gauge('alexandria.content/cache-index/size.gauge') async for _ in every(frequency): stats = self.kademlia.content_manager.get_stats() durable_db_item_count_gauge.set_value(stats.durable_item_count) ephemeral_db_item_count_gauge.set_value(stats.ephemeral_db_count) ephemeral_db_capacity_gauge.set_value(stats.ephemeral_db_capacity) ephemeral_db_size_gauge.set_value( stats.ephemeral_db_total_capacity - stats.ephemeral_db_capacity ) ephemeral_index_capacity_gauge.set_value(stats.ephemeral_index_capacity) ephemeral_index_size_gauge.set_value( stats.ephemeral_index_total_capacity - stats.ephemeral_index_capacity ) cache_db_item_count_gauge.set_value(stats.cache_db_count) cache_db_capacity_gauge.set_value(stats.cache_db_capacity) cache_db_size_gauge.set_value(stats.cache_db_total_capacity - stats.cache_db_capacity) cache_index_capacity_gauge.set_value(stats.cache_index_capacity) cache_index_size_gauge.set_value( stats.cache_index_total_capacity - stats.cache_index_capacity ) async def _collect_system_metrics(self, frequency: int) -> None: cpu_sysload_gauge = self._registry.gauge('alexandria.system/cpu/sysload.gauge') cpu_syswait_gauge = self._registry.gauge('alexandria.system/cpu/syswait.gauge') memory_used_gauge = self._registry.gauge('alexandria.system/memory/used.gauge') memory_free_gauge = self._registry.gauge('alexandria.system/memory/free.gauge') disk_readdata_meter = self._registry.meter('alexandria.system/disk/readdata.meter') disk_writedata_meter = self._registry.meter('alexandria.system/disk/writedata.meter') network_in_packets_meter = self._registry.meter('alexandria.network/in/packets/total.meter') network_out_packets_meter = self._registry.meter('alexandria.network/out/packets/total.meter') # noqa: E501 previous = read_system_stats() async for _ in every(frequency, initial_delay=frequency): current = read_system_stats() global_time = current.cpu_stats.global_time - previous.cpu_stats.global_time cpu_sysload_gauge.set_value(global_time / frequency) global_wait = current.cpu_stats.global_wait_io - previous.cpu_stats.global_wait_io cpu_syswait_gauge.set_value(global_wait / frequency) memory_used_gauge.set_value(current.memory_stats.used) memory_free_gauge.set_value(current.memory_stats.free) read_bytes = current.disk_stats.read_bytes - previous.disk_stats.read_bytes disk_readdata_meter.mark(read_bytes) write_bytes = current.disk_stats.write_bytes - previous.disk_stats.write_bytes disk_writedata_meter.mark(write_bytes) in_packets = current.network_stats.in_packets - previous.network_stats.in_packets network_in_packets_meter.mark(in_packets) out_packets = current.network_stats.out_packets - previous.network_stats.out_packets network_out_packets_meter.mark(out_packets) previous = current