예제 #1
0
    def __init__(self,
                 host: str,
                 client: ClientAPI,
                 kademlia: KademliaAPI,
                 influx_server: str,
                 influx_user: str,
                 influx_password: str,
                 influx_database: str,
                 influx_port: int = 443,
                 influx_protocol: str = 'https',
                 reporting_frequency: int = 10,
                 process_collection_frequency: int = 3):
        self._influx_server = influx_server

        self._reporting_frequency = reporting_frequency
        self._process_collection_frequency = process_collection_frequency

        self._registry = HostMetricsRegistry(host)

        self._reporter = InfluxReporter(
            registry=self._registry,
            protocol=influx_protocol,
            port=influx_port,
            database=influx_database,
            username=influx_user,
            password=influx_password,
            server=influx_server
        )

        self.client = client
        self.kademlia = kademlia
예제 #2
0
 def __init__(self, influx_server: str, influx_user: str,
              influx_password: str, influx_database: str, host: str,
              port: int, protocol: str, reporting_frequency: int):
     self._unreported_error: Exception = None
     self._last_time_reported: float = 0.0
     self._influx_server = influx_server
     self._reporting_frequency = reporting_frequency
     self._registry = HostMetricsRegistry(host)
     self._reporter = InfluxReporter(registry=self._registry,
                                     database=influx_database,
                                     username=influx_user,
                                     password=influx_password,
                                     protocol=protocol,
                                     port=port,
                                     server=influx_server)
예제 #3
0
    def __init__(self,
                 influx_server: str,
                 influx_user: str,
                 influx_password: str,
                 influx_database: str,
                 host: str,
                 reporting_frequency: int = 10):

        self._influx_server = influx_server
        self._reporting_frequency = reporting_frequency
        self._registry = HostMetricsRegistry(host)
        self._reporter = InfluxReporter(registry=self._registry,
                                        protocol='https',
                                        port=443,
                                        database=influx_database,
                                        username=influx_user,
                                        password=influx_password,
                                        server=influx_server)
예제 #4
0
class MetricsService(Service):
    """
    A service to provide a registry where metrics instruments can be registered and retrieved from.
    It continuously reports metrics to the specified InfluxDB instance.
    """
    def __init__(self,
                 influx_server: str,
                 influx_user: str,
                 influx_password: str,
                 influx_database: str,
                 host: str,
                 reporting_frequency: int = 10):

        self._influx_server = influx_server
        self._reporting_frequency = reporting_frequency
        self._registry = HostMetricsRegistry(host)
        self._reporter = InfluxReporter(registry=self._registry,
                                        protocol='https',
                                        port=443,
                                        database=influx_database,
                                        username=influx_user,
                                        password=influx_password,
                                        server=influx_server)

    logger = get_extended_debug_logger(
        'trinity.components.builtin.metrics.MetricsService')

    @property
    def registry(self) -> HostMetricsRegistry:
        """
        Return the :class:`trinity.components.builtin.metrics.registry.HostMetricsRegistry` at which
        metrics instruments can be registered and retrieved.
        """
        return self._registry

    async def run(self) -> None:
        self.logger.info("Reporting metrics to %s", self._influx_server)
        self.manager.run_daemon_task(self._continuously_report)
        await self.manager.wait_finished()

    async def _continuously_report(self) -> None:
        async for _ in trio_utils.every(self._reporting_frequency):
            self._reporter.report_now()
예제 #5
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    opts = parse_args(args)
    verbose_level = opts.verbose

    # Determine if any args which support delimited lists should be
    # modified
    if opts.peers:
        opts.peers = _split_comma_append_args(opts.peers)

    if opts.seeds:
        opts.seeds = _split_comma_append_args(opts.seeds)

    init_console_logging(verbose_level=verbose_level)

    if opts.network_auth:
        opts.network_auth = {"network": opts.network_auth}

    try:
        path_config = load_path_config(config_dir=opts.config_dir)
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    try:
        opts_config = create_validator_config(opts)
        validator_config = \
            load_validator_config(opts_config, path_config.config_dir)
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    # Process initial initialization errors, delaying the sys.exit(1) until
    # all errors have been reported to the user (via LOGGER.error()).  This
    # is intended to provide enough information to the user so they can correct
    # multiple errors before restarting the validator.
    init_errors = False
    try:
        identity_signer = load_identity_signer(key_dir=path_config.key_dir,
                                               key_name='validator')
    except LocalConfigurationError as e:
        log_configuration(log_dir=path_config.log_dir, name="validator")
        LOGGER.error(str(e))
        init_errors = True

    log_config = get_log_config()
    if not init_errors:
        if log_config is not None:
            log_configuration(log_config=log_config)
            if log_config.get('root') is not None:
                init_console_logging(verbose_level=verbose_level)
        else:
            log_configuration(log_dir=path_config.log_dir, name="validator")

    try:
        version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version
    except pkg_resources.DistributionNotFound:
        version = 'UNKNOWN'
    LOGGER.info('%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME,
                version)

    if LOGGER.isEnabledFor(logging.INFO):
        LOGGER.info('; '.join([
            'config [path]: {}'.format(line)
            for line in path_config.to_toml_string()
        ]))

    if not check_directory(path=path_config.data_dir,
                           human_readable_name='Data'):
        init_errors = True
    if not check_directory(path=path_config.log_dir,
                           human_readable_name='Log'):
        init_errors = True

    endpoint = validator_config.endpoint
    if endpoint is None:
        # Need to use join here to get the string "0.0.0.0". Otherwise,
        # bandit thinks we are binding to all interfaces and returns a
        # Medium security risk.
        interfaces = ["*", ".".join(["0", "0", "0", "0"])]
        interfaces += netifaces.interfaces()
        endpoint = validator_config.bind_network
        for interface in interfaces:
            if interface in validator_config.bind_network:
                LOGGER.error("Endpoint must be set when using %s", interface)
                init_errors = True
                break

    if init_errors:
        LOGGER.error("Initialization errors occurred (see previous log "
                     "ERROR messages), shutting down.")
        sys.exit(1)
    bind_network = validator_config.bind_network
    bind_component = validator_config.bind_component

    if "tcp://" not in bind_network:
        bind_network = "tcp://" + bind_network

    if "tcp://" not in bind_component:
        bind_component = "tcp://" + bind_component

    if validator_config.network_public_key is None or \
            validator_config.network_private_key is None:
        LOGGER.warning("Network key pair is not configured, Network "
                       "communications between validators will not be "
                       "authenticated or encrypted.")

    wrapped_registry = None
    metrics_reporter = None
    if validator_config.opentsdb_url:
        LOGGER.info("Adding metrics reporter: url=%s, db=%s",
                    validator_config.opentsdb_url,
                    validator_config.opentsdb_db)

        url = urlparse(validator_config.opentsdb_url)
        proto, db_server, db_port, = url.scheme, url.hostname, url.port

        registry = MetricsRegistry()
        wrapped_registry = MetricsRegistryWrapper(registry)

        metrics_reporter = InfluxReporter(
            registry=registry,
            reporting_interval=10,
            database=validator_config.opentsdb_db,
            prefix="sawtooth_validator",
            port=db_port,
            protocol=proto,
            server=db_server,
            username=validator_config.opentsdb_username,
            password=validator_config.opentsdb_password)
        metrics_reporter.start()

    # Verify state integrity before startup
    verify_state(bind_network, bind_component, validator_config.scheduler,
                 path_config.data_dir)

    LOGGER.info('Starting validator with %s scheduler',
                validator_config.scheduler)

    validator = Validator(bind_network,
                          bind_component,
                          endpoint,
                          validator_config.peering,
                          validator_config.seeds,
                          validator_config.peers,
                          path_config.data_dir,
                          path_config.config_dir,
                          identity_signer,
                          validator_config.scheduler,
                          validator_config.permissions,
                          validator_config.minimum_peer_connectivity,
                          validator_config.maximum_peer_connectivity,
                          validator_config.network_public_key,
                          validator_config.network_private_key,
                          roles=validator_config.roles,
                          metrics_registry=wrapped_registry)

    # pylint: disable=broad-except
    try:
        validator.start()
    except KeyboardInterrupt:
        LOGGER.info("Initiating graceful "
                    "shutdown (press Ctrl+C again to force)")
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)
    except GenesisError as genesis_err:
        LOGGER.error(str(genesis_err))
        sys.exit(1)
    except Exception as e:
        LOGGER.exception(e)
        sys.exit(1)
    finally:
        if metrics_reporter:
            metrics_reporter.stop()
        validator.stop()
예제 #6
0
파일: cli.py 프로젝트: jjason/sawtooth-core
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    opts = parse_args(args)
    verbose_level = opts.verbose

    # Determine if any args which support delimited lists should be
    # modified
    if opts.peers:
        opts.peers = _split_comma_append_args(opts.peers)

    if opts.seeds:
        opts.seeds = _split_comma_append_args(opts.seeds)

    init_console_logging(verbose_level=verbose_level)

    if opts.network_auth:
        opts.network_auth = {"network": opts.network_auth}

    try:
        path_config = load_path_config(config_dir=opts.config_dir)
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    try:
        opts_config = create_validator_config(opts)
        validator_config = \
            load_validator_config(opts_config, path_config.config_dir)
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    # Process initial initialization errors, delaying the sys.exit(1) until
    # all errors have been reported to the user (via LOGGER.error()).  This
    # is intended to provide enough information to the user so they can correct
    # multiple errors before restarting the validator.
    init_errors = False
    try:
        identity_signer = load_identity_signer(
            key_dir=path_config.key_dir,
            key_name='validator')
    except LocalConfigurationError as e:
        log_configuration(log_dir=path_config.log_dir,
                          name="validator")
        LOGGER.error(str(e))
        init_errors = True

    log_config = get_log_config()
    if not init_errors:
        if log_config is not None:
            log_configuration(log_config=log_config)
            if log_config.get('root') is not None:
                init_console_logging(verbose_level=verbose_level)
        else:
            log_configuration(log_dir=path_config.log_dir,
                              name="validator")

    try:
        version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version
    except pkg_resources.DistributionNotFound:
        version = 'UNKNOWN'
    LOGGER.info(
        '%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME, version)

    if LOGGER.isEnabledFor(logging.INFO):
        LOGGER.info(
            '; '.join([
                'config [path]: {}'.format(line)
                for line in path_config.to_toml_string()
            ])
        )

    if not check_directory(path=path_config.data_dir,
                           human_readable_name='Data'):
        init_errors = True
    if not check_directory(path=path_config.log_dir,
                           human_readable_name='Log'):
        init_errors = True

    endpoint = validator_config.endpoint
    if endpoint is None:
        # Need to use join here to get the string "0.0.0.0". Otherwise,
        # bandit thinks we are binding to all interfaces and returns a
        # Medium security risk.
        interfaces = ["*", ".".join(["0", "0", "0", "0"])]
        interfaces += netifaces.interfaces()
        endpoint = validator_config.bind_network
        for interface in interfaces:
            if interface in validator_config.bind_network:
                LOGGER.error("Endpoint must be set when using %s", interface)
                init_errors = True
                break

    if init_errors:
        LOGGER.error("Initialization errors occurred (see previous log "
                     "ERROR messages), shutting down.")
        sys.exit(1)
    bind_network = validator_config.bind_network
    bind_component = validator_config.bind_component

    if "tcp://" not in bind_network:
        bind_network = "tcp://" + bind_network

    if "tcp://" not in bind_component:
        bind_component = "tcp://" + bind_component

    if validator_config.network_public_key is None or \
            validator_config.network_private_key is None:
        LOGGER.warning("Network key pair is not configured, Network "
                       "communications between validators will not be "
                       "authenticated or encrypted.")

    wrapped_registry = None
    metrics_reporter = None
    if validator_config.opentsdb_url:
        LOGGER.info("Adding metrics reporter: url=%s, db=%s",
                    validator_config.opentsdb_url,
                    validator_config.opentsdb_db)

        url = urlparse(validator_config.opentsdb_url)
        proto, db_server, db_port, = url.scheme, url.hostname, url.port

        registry = MetricsRegistry()
        wrapped_registry = MetricsRegistryWrapper(registry)

        metrics_reporter = InfluxReporter(
            registry=registry,
            reporting_interval=10,
            database=validator_config.opentsdb_db,
            prefix="sawtooth_validator",
            port=db_port,
            protocol=proto,
            server=db_server,
            username=validator_config.opentsdb_username,
            password=validator_config.opentsdb_password)
        metrics_reporter.start()

    LOGGER.info(
        'Starting validator with %s scheduler',
        validator_config.scheduler)

    validator = Validator(
        bind_network,
        bind_component,
        endpoint,
        validator_config.peering,
        validator_config.seeds,
        validator_config.peers,
        path_config.data_dir,
        path_config.config_dir,
        identity_signer,
        validator_config.scheduler,
        validator_config.permissions,
        validator_config.minimum_peer_connectivity,
        validator_config.maximum_peer_connectivity,
        validator_config.network_public_key,
        validator_config.network_private_key,
        roles=validator_config.roles,
        metrics_registry=wrapped_registry)

    # pylint: disable=broad-except
    try:
        validator.start()
    except KeyboardInterrupt:
        LOGGER.info("Initiating graceful "
                    "shutdown (press Ctrl+C again to force)")
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)
    except GenesisError as genesis_err:
        LOGGER.error(str(genesis_err))
        sys.exit(1)
    except Exception as e:
        LOGGER.exception(e)
        sys.exit(1)
    finally:
        if metrics_reporter:
            metrics_reporter.stop()
        validator.stop()
예제 #7
0
def main():
    loop = ZMQEventLoop()
    asyncio.set_event_loop(loop)

    connection = None
    try:
        opts = parse_args(sys.argv[1:])
        opts_config = RestApiConfig(bind=opts.bind,
                                    connect=opts.connect,
                                    timeout=opts.timeout,
                                    opentsdb_url=opts.opentsdb_url,
                                    opentsdb_db=opts.opentsdb_db)
        rest_api_config = load_rest_api_config(opts_config)
        url = None
        if "tcp://" not in rest_api_config.connect:
            url = "tcp://" + rest_api_config.connect
        else:
            url = rest_api_config.connect

        connection = Connection(url)

        log_config = get_log_config(filename="rest_api_log_config.toml")

        # If no toml, try loading yaml
        if log_config is None:
            log_config = get_log_config(filename="rest_api_log_config.yaml")

        if log_config is not None:
            log_configuration(log_config=log_config)
        else:
            log_dir = get_log_dir()
            log_configuration(log_dir=log_dir, name="rest_api")
        init_console_logging(verbose_level=opts.verbose)

        try:
            host, port = rest_api_config.bind[0].split(":")
            port = int(port)
        except ValueError as e:
            print("Unable to parse binding {}: Must be in the format"
                  " host:port".format(rest_api_config.bind[0]))
            sys.exit(1)

        wrapped_registry = None
        if rest_api_config.opentsdb_url:
            LOGGER.info("Adding metrics reporter: url=%s, db=%s",
                        rest_api_config.opentsdb_url,
                        rest_api_config.opentsdb_db)

            url = urlparse(rest_api_config.opentsdb_url)
            proto, db_server, db_port, = url.scheme, url.hostname, url.port

            registry = MetricsRegistry()
            wrapped_registry = MetricsRegistryWrapper(registry)

            reporter = InfluxReporter(
                registry=registry,
                reporting_interval=10,
                database=rest_api_config.opentsdb_db,
                prefix="sawtooth_rest_api",
                port=db_port,
                protocol=proto,
                server=db_server,
                username=rest_api_config.opentsdb_username,
                password=rest_api_config.opentsdb_password)
            reporter.start()

        start_rest_api(host, port, connection, int(rest_api_config.timeout),
                       wrapped_registry)
        # pylint: disable=broad-except
    except Exception as e:
        LOGGER.exception(e)
        sys.exit(1)
    finally:
        if connection is not None:
            connection.close()
예제 #8
0
def main(args):
    try:
        path_config = load_path_config(config_dir=args['config_dir'])
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    try:
        opts_config = ValidatorConfig(
            bind_component=args['bind_component'],
            bind_network=args['bind_network'],
            bind_consensus=args['bind_consensus'],
            endpoint=args['endpoint'],
            maximum_peer_connectivity=args['maximum_peer_connectivity'],
            minimum_peer_connectivity=args['minimum_peer_connectivity'],
            roles=args['roles'],
            opentsdb_db=args['opentsdb_db'],
            opentsdb_url=args['opentsdb_url'],
            peering=args['peering'],
            peers=args['peers'],
            scheduler=args['scheduler'],
            seeds=args['seeds'],
            state_pruning_block_depth=args['state_pruning_block_depth'],
            fork_cache_keep_time=args['fork_cache_keep_time'],
        )

        validator_config = \
            load_validator_config(opts_config, path_config.config_dir)
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    try:
        log_configuration(log_dir=path_config.log_dir, name="validator")
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    # Process initial initialization errors, delaying the sys.exit(1) until
    # all errors have been reported to the user (via LOGGER.error()).  This
    # is intended to provide enough information to the user so they can correct
    # multiple errors before restarting the validator.
    init_errors = False
    try:
        identity_signer = load_identity_signer(key_dir=path_config.key_dir,
                                               key_name='validator')
    except LocalConfigurationError as e:
        LOGGER.error(str(e))
        init_errors = True

    log_config = get_log_config()
    if not init_errors:
        if log_config is not None:
            log_configuration(log_config=log_config)
            if log_config.get('root') is not None:
                init_console_logging(verbose_level=args['verbose'])
        else:
            log_configuration(log_dir=path_config.log_dir, name="validator")

    try:
        version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version
    except pkg_resources.DistributionNotFound:
        version = 'UNKNOWN'
    LOGGER.info('%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME,
                version)

    if LOGGER.isEnabledFor(logging.INFO):
        LOGGER.info('; '.join([
            'config [path]: {}'.format(line)
            for line in path_config.to_toml_string()
        ]))

    if not check_directory(path=path_config.data_dir,
                           human_readable_name='Data'):
        init_errors = True
    if not check_directory(path=path_config.log_dir,
                           human_readable_name='Log'):
        init_errors = True

    endpoint = validator_config.endpoint
    if endpoint is None:
        # Need to use join here to get the string "0.0.0.0". Otherwise,
        # bandit thinks we are binding to all interfaces and returns a
        # Medium security risk.
        interfaces = ["*", ".".join(["0", "0", "0", "0"])]
        interfaces += netifaces.interfaces()
        endpoint = validator_config.bind_network
        parsed_endpoint = urlparse(validator_config.bind_network)
        for interface in interfaces:
            if interface == parsed_endpoint.hostname:
                LOGGER.error("Endpoint must be set when using %s", interface)
                init_errors = True

    if init_errors:
        LOGGER.error("Initialization errors occurred (see previous log "
                     "ERROR messages), shutting down.")
        sys.exit(1)
    bind_network = validator_config.bind_network
    bind_component = validator_config.bind_component
    bind_consensus = validator_config.bind_consensus

    if "tcp://" not in bind_network:
        bind_network = "tcp://" + bind_network

    if "tcp://" not in bind_component:
        bind_component = "tcp://" + bind_component

    if bind_consensus and "tcp://" not in bind_consensus:
        bind_consensus = "tcp://" + bind_consensus

    if validator_config.network_public_key is None or \
            validator_config.network_private_key is None:
        LOGGER.warning("Network key pair is not configured, Network "
                       "communications between validators will not be "
                       "authenticated or encrypted.")

    metrics_reporter = None
    if validator_config.opentsdb_url:
        LOGGER.info("Adding metrics reporter: url=%s, db=%s",
                    validator_config.opentsdb_url,
                    validator_config.opentsdb_db)

        url = urlparse(validator_config.opentsdb_url)
        proto, db_server, db_port, = url.scheme, url.hostname, url.port

        registry = MetricsRegistry()
        metrics.init_metrics(registry=registry)

        metrics_reporter = InfluxReporter(
            registry=registry,
            reporting_interval=10,
            database=validator_config.opentsdb_db,
            prefix="sawtooth_validator",
            port=db_port,
            protocol=proto,
            server=db_server,
            username=validator_config.opentsdb_username,
            password=validator_config.opentsdb_password)
        metrics_reporter.start()
    else:
        metrics.init_metrics()

    # Verify state integrity before startup
    global_state_db, blockstore = state_verifier.get_databases(
        bind_network, path_config.data_dir)

    state_verifier.verify_state(global_state_db, blockstore, bind_component,
                                validator_config.scheduler)

    # Explicitly drop this, so there are not two db instances
    global_state_db.drop()
    global_state_db = None

    LOGGER.info('Starting validator with %s scheduler',
                validator_config.scheduler)

    component_workers = validator_config.component_thread_pool_workers
    network_workers = validator_config.network_thread_pool_workers
    sig_workers = validator_config.signature_thread_pool_workers
    validator = Validator(bind_network,
                          bind_component,
                          bind_consensus,
                          endpoint,
                          validator_config.peering,
                          validator_config.seeds,
                          validator_config.peers,
                          path_config.data_dir,
                          path_config.config_dir,
                          identity_signer,
                          path_config.key_dir,
                          validator_config.scheduler,
                          validator_config.permissions,
                          validator_config.minimum_peer_connectivity,
                          validator_config.maximum_peer_connectivity,
                          validator_config.state_pruning_block_depth,
                          validator_config.fork_cache_keep_time,
                          validator_config.network_public_key,
                          validator_config.network_private_key,
                          roles=validator_config.roles,
                          component_thread_pool_workers=component_workers,
                          network_thread_pool_workers=network_workers,
                          signature_thread_pool_workers=sig_workers)

    # pylint: disable=broad-except
    try:
        validator.start()
    except KeyboardInterrupt:
        LOGGER.info("Initiating graceful "
                    "shutdown (press Ctrl+C again to force)")
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)
    except GenesisError as genesis_err:
        LOGGER.error(str(genesis_err))
        sys.exit(1)
    except Exception as e:
        LOGGER.exception(e)
        sys.exit(1)
    finally:
        if metrics_reporter:
            metrics_reporter.stop()
        validator.stop()
예제 #9
0
def main(args):
    try:
        path_config = load_path_config(config_dir=args['config_dir'])
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    try:
        opts_config = ValidatorConfig(
            bind_component=args['bind_component'],
            bind_network=args['bind_network'],
            bind_consensus=args['bind_consensus'],
            endpoint=args['endpoint'],
            maximum_peer_connectivity=args['maximum_peer_connectivity'],
            minimum_peer_connectivity=args['minimum_peer_connectivity'],
            roles=args['roles'],
            opentsdb_db=args['opentsdb_db'],
            opentsdb_url=args['opentsdb_url'],
            peering=args['peering'],
            peers=args['peers'],
            scheduler=args['scheduler'],
            seeds=args['seeds'],
            state_pruning_block_depth=args['state_pruning_block_depth'],
            fork_cache_keep_time=args['fork_cache_keep_time'],
        )

        validator_config = \
            load_validator_config(opts_config, path_config.config_dir)
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    try:
        log_configuration(log_dir=path_config.log_dir,
                          name="validator")
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)

    # Process initial initialization errors, delaying the sys.exit(1) until
    # all errors have been reported to the user (via LOGGER.error()).  This
    # is intended to provide enough information to the user so they can correct
    # multiple errors before restarting the validator.
    init_errors = False
    try:
        identity_signer = load_identity_signer(
            key_dir=path_config.key_dir,
            key_name='validator')
    except LocalConfigurationError as e:
        LOGGER.error(str(e))
        init_errors = True

    log_config = get_log_config()
    if not init_errors:
        if log_config is not None:
            log_configuration(log_config=log_config)
            if log_config.get('root') is not None:
                init_console_logging(verbose_level=args['verbose'])
        else:
            log_configuration(log_dir=path_config.log_dir,
                              name="validator")

    try:
        version = pkg_resources.get_distribution(DISTRIBUTION_NAME).version
    except pkg_resources.DistributionNotFound:
        version = 'UNKNOWN'
    LOGGER.info(
        '%s (Hyperledger Sawtooth) version %s', DISTRIBUTION_NAME, version)

    if LOGGER.isEnabledFor(logging.INFO):
        LOGGER.info(
            '; '.join([
                'config [path]: {}'.format(line)
                for line in path_config.to_toml_string()
            ])
        )

    if not check_directory(path=path_config.data_dir,
                           human_readable_name='Data'):
        init_errors = True
    if not check_directory(path=path_config.log_dir,
                           human_readable_name='Log'):
        init_errors = True

    endpoint = validator_config.endpoint
    if endpoint is None:
        # Need to use join here to get the string "0.0.0.0". Otherwise,
        # bandit thinks we are binding to all interfaces and returns a
        # Medium security risk.
        interfaces = ["*", ".".join(["0", "0", "0", "0"])]
        interfaces += netifaces.interfaces()
        endpoint = validator_config.bind_network
        parsed_endpoint = urlparse(validator_config.bind_network)
        for interface in interfaces:
            if interface == parsed_endpoint.hostname:
                LOGGER.error("Endpoint must be set when using %s", interface)
                init_errors = True

    if init_errors:
        LOGGER.error("Initialization errors occurred (see previous log "
                     "ERROR messages), shutting down.")
        sys.exit(1)
    bind_network = validator_config.bind_network
    bind_component = validator_config.bind_component
    bind_consensus = validator_config.bind_consensus

    if "tcp://" not in bind_network:
        bind_network = "tcp://" + bind_network

    if "tcp://" not in bind_component:
        bind_component = "tcp://" + bind_component

    if bind_consensus and "tcp://" not in bind_consensus:
        bind_consensus = "tcp://" + bind_consensus

    if validator_config.network_public_key is None or \
            validator_config.network_private_key is None:
        LOGGER.warning("Network key pair is not configured, Network "
                       "communications between validators will not be "
                       "authenticated or encrypted.")

    metrics_reporter = None
    if validator_config.opentsdb_url:
        LOGGER.info("Adding metrics reporter: url=%s, db=%s",
                    validator_config.opentsdb_url,
                    validator_config.opentsdb_db)

        url = urlparse(validator_config.opentsdb_url)
        proto, db_server, db_port, = url.scheme, url.hostname, url.port

        registry = MetricsRegistry()
        metrics.init_metrics(registry=registry)

        metrics_reporter = InfluxReporter(
            registry=registry,
            reporting_interval=10,
            database=validator_config.opentsdb_db,
            prefix="sawtooth_validator",
            port=db_port,
            protocol=proto,
            server=db_server,
            username=validator_config.opentsdb_username,
            password=validator_config.opentsdb_password)
        metrics_reporter.start()
    else:
        metrics.init_metrics()

    # Verify state integrity before startup
    global_state_db, blockstore = state_verifier.get_databases(
        bind_network,
        path_config.data_dir)

    state_verifier.verify_state(
        global_state_db,
        blockstore,
        bind_component,
        validator_config.scheduler)

    # Explicitly drop this, so there are not two db instances
    global_state_db.drop()
    global_state_db = None

    LOGGER.info(
        'Starting validator with %s scheduler',
        validator_config.scheduler)

    component_workers = validator_config.component_thread_pool_workers
    network_workers = validator_config.network_thread_pool_workers
    sig_workers = validator_config.signature_thread_pool_workers
    validator = Validator(
        bind_network,
        bind_component,
        bind_consensus,
        endpoint,
        validator_config.peering,
        validator_config.seeds,
        validator_config.peers,
        path_config.data_dir,
        path_config.config_dir,
        identity_signer,
        validator_config.scheduler,
        validator_config.permissions,
        validator_config.minimum_peer_connectivity,
        validator_config.maximum_peer_connectivity,
        validator_config.state_pruning_block_depth,
        validator_config.fork_cache_keep_time,
        validator_config.network_public_key,
        validator_config.network_private_key,
        roles=validator_config.roles,
        component_thread_pool_workers=component_workers,
        network_thread_pool_workers=network_workers,
        signature_thread_pool_workers=sig_workers)

    # pylint: disable=broad-except
    try:
        validator.start()
    except KeyboardInterrupt:
        LOGGER.info("Initiating graceful "
                    "shutdown (press Ctrl+C again to force)")
    except LocalConfigurationError as local_config_err:
        LOGGER.error(str(local_config_err))
        sys.exit(1)
    except GenesisError as genesis_err:
        LOGGER.error(str(genesis_err))
        sys.exit(1)
    except Exception as e:
        LOGGER.exception(e)
        sys.exit(1)
    finally:
        if metrics_reporter:
            metrics_reporter.stop()
        validator.stop()
예제 #10
0
def main():
    loop = ZMQEventLoop()
    asyncio.set_event_loop(loop)

    connection = None
    try:
        opts = parse_args(sys.argv[1:])
        opts_config = RestApiConfig(
            bind=opts.bind,
            connect=opts.connect,
            timeout=opts.timeout,
            opentsdb_url=opts.opentsdb_url,
            opentsdb_db=opts.opentsdb_db)
        rest_api_config = load_rest_api_config(opts_config)
        url = None
        if "tcp://" not in rest_api_config.connect:
            url = "tcp://" + rest_api_config.connect
        else:
            url = rest_api_config.connect

        connection = Connection(url)

        log_config = get_log_config(filename="rest_api_log_config.toml")

        # If no toml, try loading yaml
        if log_config is None:
            log_config = get_log_config(filename="rest_api_log_config.yaml")

        if log_config is not None:
            log_configuration(log_config=log_config)
        else:
            log_dir = get_log_dir()
            log_configuration(log_dir=log_dir, name="rest_api")
        init_console_logging(verbose_level=opts.verbose)

        try:
            host, port = rest_api_config.bind[0].split(":")
            port = int(port)
        except ValueError as e:
            print("Unable to parse binding {}: Must be in the format"
                  " host:port".format(rest_api_config.bind[0]))
            sys.exit(1)

        wrapped_registry = None
        if rest_api_config.opentsdb_url:
            LOGGER.info("Adding metrics reporter: url=%s, db=%s",
                        rest_api_config.opentsdb_url,
                        rest_api_config.opentsdb_db)

            url = urlparse(rest_api_config.opentsdb_url)
            proto, db_server, db_port, = url.scheme, url.hostname, url.port

            registry = MetricsRegistry()
            wrapped_registry = MetricsRegistryWrapper(registry)

            reporter = InfluxReporter(
                registry=registry,
                reporting_interval=10,
                database=rest_api_config.opentsdb_db,
                prefix="sawtooth_rest_api",
                port=db_port,
                protocol=proto,
                server=db_server,
                username=rest_api_config.opentsdb_username,
                password=rest_api_config.opentsdb_password)
            reporter.start()

        start_rest_api(
            host,
            port,
            connection,
            int(rest_api_config.timeout),
            wrapped_registry)
        # pylint: disable=broad-except
    except Exception as e:
        LOGGER.exception(e)
        sys.exit(1)
    finally:
        if connection is not None:
            connection.close()
예제 #11
0
class BaseMetricsService(Service, MetricsServiceAPI):
    """
    A service to provide a registry where metrics instruments can be registered and retrieved from.
    It continuously reports metrics to the specified InfluxDB instance.
    """

    MIN_SECONDS_BETWEEN_ERROR_LOGS = 60

    def __init__(self, influx_server: str, influx_user: str,
                 influx_password: str, influx_database: str, host: str,
                 port: int, protocol: str, reporting_frequency: int):
        self._unreported_error: Exception = None
        self._last_time_reported: float = 0.0
        self._influx_server = influx_server
        self._reporting_frequency = reporting_frequency
        self._registry = HostMetricsRegistry(host)
        self._reporter = InfluxReporter(registry=self._registry,
                                        database=influx_database,
                                        username=influx_user,
                                        password=influx_password,
                                        protocol=protocol,
                                        port=port,
                                        server=influx_server)

    logger = get_logger('trinity.components.builtin.metrics.MetricsService')

    @property
    def registry(self) -> HostMetricsRegistry:
        """
        Return the :class:`trinity.components.builtin.metrics.registry.HostMetricsRegistry` at which
        metrics instruments can be registered and retrieved.
        """
        return self._registry

    async def run(self) -> None:
        self.logger.info("Reporting metrics to %s", self._influx_server)
        self.manager.run_daemon_task(self.continuously_report)
        await self.manager.wait_finished()

    def report_now(self) -> None:
        try:
            self._reporter.report_now()
        except (HTTPException, ConnectionError) as exc:

            # This method is usually called every few seconds. If there's an issue with the
            # connection we do not want to flood the log and tame down warnings.

            # 1. We log the first instance of an exception immediately
            # 2. We log follow up exceptions only after a minimum time has elapsed
            # This means that we also might overwrite exceptions for different errors

            if self._is_justified_to_log_error():
                self._log_and_clear(exc)
            else:
                self._unreported_error = exc
        else:
            # If errors disappear, we want to make sure we eventually report the last instance
            if self._unreported_error is not None and self._is_justified_to_log_error(
            ):
                self._log_and_clear(self._unreported_error)

    def _log_and_clear(self, error: Exception) -> None:
        self.logger.warning("Unable to report metrics: %s", error)
        self._unreported_error = None
        self._last_time_reported = time.monotonic()

    def _is_justified_to_log_error(self) -> bool:
        return (self._last_time_reported == 0.0
                or time.monotonic() - self._last_time_reported >
                self.MIN_SECONDS_BETWEEN_ERROR_LOGS)

    @abstractmethod
    async def continuously_report(self) -> None:
        ...
예제 #12
0
class Metrics(Service):
    logger = logging.getLogger('alexandria.metrics.Metrics')

    def __init__(self,
                 host: str,
                 client: ClientAPI,
                 kademlia: KademliaAPI,
                 influx_server: str,
                 influx_user: str,
                 influx_password: str,
                 influx_database: str,
                 influx_port: int = 443,
                 influx_protocol: str = 'https',
                 reporting_frequency: int = 10,
                 process_collection_frequency: int = 3):
        self._influx_server = influx_server

        self._reporting_frequency = reporting_frequency
        self._process_collection_frequency = process_collection_frequency

        self._registry = HostMetricsRegistry(host)

        self._reporter = InfluxReporter(
            registry=self._registry,
            protocol=influx_protocol,
            port=influx_port,
            database=influx_database,
            username=influx_user,
            password=influx_password,
            server=influx_server
        )

        self.client = client
        self.kademlia = kademlia

    @classmethod
    def from_cli_args(cls,
                      args: Namespace,
                      client: ClientAPI,
                      kademlia: KademliaAPI,
                      ) -> 'Metrics':
        return cls(
            host=args.metrics_host,
            client=client,
            kademlia=kademlia,
            influx_server=args.metrics_influx_server,
            influx_user=args.metrics_influx_user,
            influx_password=args.metrics_influx_password,
            influx_database=args.metrics_influx_database,
            influx_port=args.metrics_influx_port,
            influx_protocol=args.metrics_influx_protocol,
            reporting_frequency=args.metrics_reporting_frequency,
        )

    async def run(self) -> None:
        self.manager.run_daemon_task(
            self._continuously_report,
            self._reporting_frequency,
        )
        self.manager.run_daemon_task(
            self._collect_system_metrics,
            self._process_collection_frequency,
        )
        self.manager.run_daemon_task(
            self._report_routing_table_stats,
            10,
        )
        self.manager.run_daemon_task(
            self._report_content_manager_stats,
            10,
        )
        self.logger.info('Metrics started')
        for payload_type in PAYLOAD_TYPES:
            self.manager.run_daemon_task(self._report_inbound_message_stats, payload_type)

        self.manager.run_daemon_task(self._report_event, self.client.events.session_created, 'events/session-created')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.session_idle, 'events/session-idle')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.handshake_complete, 'events/handshake-complete')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.handshake_timeout, 'events/handshake-timeout')  # noqa: E501

        self.manager.run_daemon_task(self._report_event, self.client.events.datagram_received, 'datagram/inbound')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.datagram_sent, 'datagram/outbound')  # noqa: E501

        self.manager.run_daemon_task(self._report_event, self.client.events.sent_ping, 'messages/outbound/Ping')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_pong, 'messages/outbound/Pong')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_find_nodes, 'messages/outbound/FindNodes')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_found_nodes, 'messages/outbound/FoundNodes')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_advertise, 'messages/outbound/Advertise')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_ack, 'messages/outbound/Ack')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_locate, 'messages/outbound/Locate')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_locations, 'messages/outbound/Locations')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_retrieve, 'messages/outbound/Retrieve')  # noqa: E501
        self.manager.run_daemon_task(self._report_event, self.client.events.sent_chunk, 'messages/outbound/Chunk')  # noqa: E501

        await self.manager.wait_finished()

    async def _continuously_report(self, frequency: int) -> None:
        async for _ in every(frequency):
            self._reporter.report_now()

    async def _report_event(self, event: EventAPI[Any], suffix: str) -> None:
        counter = self._registry.counter(f'alexandria.{suffix}.counter')
        meter = self._registry.meter(f'alexandria.{suffix}.meter')

        async with event.subscribe() as subscription:
            async for _ in subscription:
                counter.inc()
                meter.mark()

    async def _report_routing_table_stats(self, frequency: int) -> None:
        size_gauge = self._registry.gauge('alexandria.dht/routing-table/total-nodes.gauge')
        async for _ in every(frequency):
            stats = self.kademlia.routing_table.get_stats()
            size_gauge.set_value(stats.total_nodes)

    async def _report_inbound_message_stats(self, payload_type: Type[ssz.Serializable]) -> None:
        name = payload_type.__name__
        counter = self._registry.counter(f'alexandria.messages/inbound/{name}.counter')
        meter = self._registry.meter(f'alexandria.messages/inbound/{name}.meter')

        async with self.client.message_dispatcher.subscribe(payload_type) as subscription:
            async for payload in subscription:
                counter.inc()
                meter.mark()

    async def _report_content_manager_stats(self, frequency: int) -> None:
        gauge = self._registry.gauge

        durable_db_item_count_gauge = gauge('alexandria.content/durable-db/item-count.gauge')

        ephemeral_db_item_count_gauge = gauge('alexandria.content/ephemeral-db/item-count.gauge')
        ephemeral_db_capacity_gauge = gauge('alexandria.content/ephemeral-db/capacity.gauge')
        ephemeral_db_size_gauge = gauge('alexandria.content/ephemeral-db/size.gauge')

        ephemeral_index_capacity_gauge = gauge('alexandria.content/ephemeral-index/capacity.gauge')
        ephemeral_index_size_gauge = gauge('alexandria.content/ephemeral-index/size.gauge')

        cache_db_item_count_gauge = gauge('alexandria.content/cache-db/item-count.gauge')
        cache_db_capacity_gauge = gauge('alexandria.content/cache-db/capacity.gauge')
        cache_db_size_gauge = gauge('alexandria.content/cache-db/size.gauge')

        cache_index_capacity_gauge = gauge('alexandria.content/cache-index/capacity.gauge')
        cache_index_size_gauge = gauge('alexandria.content/cache-index/size.gauge')

        async for _ in every(frequency):
            stats = self.kademlia.content_manager.get_stats()

            durable_db_item_count_gauge.set_value(stats.durable_item_count)

            ephemeral_db_item_count_gauge.set_value(stats.ephemeral_db_count)
            ephemeral_db_capacity_gauge.set_value(stats.ephemeral_db_capacity)
            ephemeral_db_size_gauge.set_value(
                stats.ephemeral_db_total_capacity - stats.ephemeral_db_capacity
            )

            ephemeral_index_capacity_gauge.set_value(stats.ephemeral_index_capacity)
            ephemeral_index_size_gauge.set_value(
                stats.ephemeral_index_total_capacity - stats.ephemeral_index_capacity
            )

            cache_db_item_count_gauge.set_value(stats.cache_db_count)
            cache_db_capacity_gauge.set_value(stats.cache_db_capacity)
            cache_db_size_gauge.set_value(stats.cache_db_total_capacity - stats.cache_db_capacity)

            cache_index_capacity_gauge.set_value(stats.cache_index_capacity)
            cache_index_size_gauge.set_value(
                stats.cache_index_total_capacity - stats.cache_index_capacity
            )

    async def _collect_system_metrics(self, frequency: int) -> None:
        cpu_sysload_gauge = self._registry.gauge('alexandria.system/cpu/sysload.gauge')
        cpu_syswait_gauge = self._registry.gauge('alexandria.system/cpu/syswait.gauge')

        memory_used_gauge = self._registry.gauge('alexandria.system/memory/used.gauge')
        memory_free_gauge = self._registry.gauge('alexandria.system/memory/free.gauge')

        disk_readdata_meter = self._registry.meter('alexandria.system/disk/readdata.meter')
        disk_writedata_meter = self._registry.meter('alexandria.system/disk/writedata.meter')

        network_in_packets_meter = self._registry.meter('alexandria.network/in/packets/total.meter')
        network_out_packets_meter = self._registry.meter('alexandria.network/out/packets/total.meter')  # noqa: E501

        previous = read_system_stats()
        async for _ in every(frequency, initial_delay=frequency):
            current = read_system_stats()

            global_time = current.cpu_stats.global_time - previous.cpu_stats.global_time
            cpu_sysload_gauge.set_value(global_time / frequency)
            global_wait = current.cpu_stats.global_wait_io - previous.cpu_stats.global_wait_io
            cpu_syswait_gauge.set_value(global_wait / frequency)

            memory_used_gauge.set_value(current.memory_stats.used)
            memory_free_gauge.set_value(current.memory_stats.free)

            read_bytes = current.disk_stats.read_bytes - previous.disk_stats.read_bytes
            disk_readdata_meter.mark(read_bytes)

            write_bytes = current.disk_stats.write_bytes - previous.disk_stats.write_bytes
            disk_writedata_meter.mark(write_bytes)

            in_packets = current.network_stats.in_packets - previous.network_stats.in_packets
            network_in_packets_meter.mark(in_packets)
            out_packets = current.network_stats.out_packets - previous.network_stats.out_packets
            network_out_packets_meter.mark(out_packets)

            previous = current