Beispiel #1
0
 def test_get_oasis_prometheus_error_if_metric_not_found(self, _):
     try:
         get_oasis_prometheus(ENDPOINT, PARAMS_WITH_MISSING, LOGGER)
         self.fail('Expected MetricNotFoundException')
     except MetricNotFoundException:
         pass
Beispiel #2
0
 def test_get_oasis_prometheus_result_if_endpoint_has_all_results(self, _):
     ret = get_oasis_prometheus(ENDPOINT, PARAMS, LOGGER)
     self.assertEqual(PROCESSED_RESULT, ret)
Beispiel #3
0
 def test_get_oasis_prometheus_error_if_no_params_where_given(self, _):
     try:
         get_oasis_prometheus(ENDPOINT, {}, LOGGER)
         self.fail('Expected NoParametersGivenException')
     except NoParametersGivenException:
         pass
Beispiel #4
0
def node_from_node_config(node_config: NodeConfig):
    # Test connection and match-up chain name
    log_and_print('Trying to retrieve data from the API of {}'.format(
        node_config.node_name))

    # Try to ping the API to see if configuration is correct
    try:
        pong_response = oasis_api_data_wrapper.ping_api(
            node_config.node_api_url)
        if pong_response != "pong":
            log_and_print('WARNING: API of node {} is not reachable.'.format(
                node_config.node_name))
        log_and_print('Success. API is configured correctly')
    except Exception as e:
        logger_general.error(e)
        raise InitialisationException(
            'Failed to retrieve data from the API of {}'.format(
                node_config.node_name))

    # Test connection and match-up chain name
    log_and_print('Trying to retrieve node name {} from API'.format(
        node_config.node_name))

    # Check if the node name exists by Pinging the node
    # If it doesn't then it is miss configured.
    try:
        pong_response = oasis_api_data_wrapper.ping_node(
            node_config.node_api_url, node_config.node_name)
        if pong_response != "pong":
            log_and_print(
                'WARNING: Node {} is not configured properly, PANIC node' \
                'name should match that set in the API Server.'.format(
                    node_config.node_name))
        log_and_print('Success. node name is configured correctly')
    except Exception as e:
        logger_general.error(e)
        raise InitialisationException(
            'Failed to retrieve data from the API of {}'.format(
                node_config.node_name))

    # Get node type
    node_type = NodeType.VALIDATOR_FULL_NODE \
        if node_config.node_is_validator \
        else NodeType.NON_VALIDATOR_FULL_NODE

    # Check if the node public key exists by calling the API to retrieve the
    # node successfully.
    # Test connection and match-up chain name
    if node_config.node_is_validator:
        log_and_print('Trying to retrieve Node Public Key {} from'.format(
            node_config.node_name))
        try:
            node_details = oasis_api_data_wrapper.get_node(
                node_config.node_api_url, node_config.node_name,
                node_config.node_public_key)

            entity_public_key = node_details['entity_id']

            staking_address = oasis_api_data_wrapper.get_staking_address(
                node_config.node_api_url, entity_public_key)

        except Exception as e:
            logger_general.error(e)
            raise InitialisationException(
                'Failed validating node public key {}'.format(
                    node_config.node_public_key))
    else:
        entity_public_key = EMPTY_URL
        staking_address = EMPTY_URL

    # Prometheus configuration should be checked on start up if Peers monitoring
    # is enabled.
    try:
        peers_response = oasis_api_data_wrapper.get_prometheus_gauge(
            node_config.node_api_url, node_config.node_name, \
            "tendermint_p2p_peers")
        if isinstance(peers_response, int):
            log_and_print(
                'WARNING: Node {} does not have prometheus enabled. Please ' \
                'enable Prometheus to monitor data such as no of Peers'.format(
                    node_config.node_name))
        log_and_print('Success. Prometheus is configured correctly')
    except Exception as e:
        logger_general.error(e)
        raise InitialisationException(
            'Failed to retrieve Prometheus Data from API of {}'.format(
                node_config.node_name))

    # Node Exporter should be an optional tool for System Monitoring
    if node_config.node_exporter_url != "":
        try:
            metric_to_test = ['process_cpu_seconds_total']

            prometheus_data = get_oasis_prometheus( \
                node_config.node_exporter_url, metric_to_test, logger_general)

            process_cpu_seconds_total = ( \
                prometheus_data['process_cpu_seconds_total'])

            node_exporter_url = node_config.node_exporter_url
            log_and_print('Success. Node Exporter is configured correctly')
        except Exception as e:
            log_and_print(e)
            logger_general.error(e)
            raise InitialisationException(
                'Failed to retrieve Node Exporter Data from URL of {}'.format(
                    node_config.node_name))
    else:
        node_exporter_url = EMPTY_URL

    # Test connection and match-up chain name
    log_and_print('Trying to convert the Node {} Key into a Consensus ' \
                  'Public Key and a Tendermint Address key '.format(
        node_config.node_name))

    # Retrieve the Consensus Public Key and the Tendermint Address
    if node_config.node_is_validator:
        try:
            consensus_public_key = oasis_api_data_wrapper. \
                get_registry_node(node_config.node_api_url, \
                                  node_config.node_name, \
                                  node_config.node_public_key)

            tendermint_address_key = oasis_api_data_wrapper. \
                get_tendermint_address(node_config.node_api_url, \
                                       str(consensus_public_key['consensus'][
                                               'id']))

            log_and_print('Successfully converted node public key into ' \
                          'Consensus Public Key and Tendermint Address')
        except Exception as e:
            logger_general.error(e)
            raise InitialisationException(
                'Failed to convert a node public key for the node {}'.format(
                    node_config.node_name))
    else:
        consensus_public_key = EMPTY_URL
        tendermint_address_key = EMPTY_URL

    chain_id = node_config.chain_name
    # Initialise node and load any state
    node = Node(node_config.node_name,
                node_config.node_api_url,
                node_exporter_url,
                node_type,
                node_config.node_public_key,
                chain_id,
                REDIS,
                node_config.is_archive_node,
                consensus_public_key,
                tendermint_address_key,
                staking_address,
                entity_public_key,
                internal_conf=InternalConf)
    node.load_state(logger_general)

    # Return node
    return node
Beispiel #5
0
    def monitor(self) -> None:
        metrics_to_monitor = [
            'process_cpu_seconds_total', 'go_memstats_alloc_bytes',
            'go_memstats_alloc_bytes_total', 'process_virtual_memory_bytes',
            'process_max_fds', 'process_open_fds', 'node_cpu_seconds_total',
            'node_filesystem_avail_bytes', 'node_filesystem_size_bytes',
            'node_memory_MemTotal_bytes', 'node_memory_MemAvailable_bytes'
        ]

        prometheus_data = get_oasis_prometheus(self.prometheus_endpoint,
                                               metrics_to_monitor, self.logger)

        try:
            process_cpu_seconds_total = ( \
                prometheus_data['process_cpu_seconds_total'])

            self._logger.debug('%s process_cpu_seconds_total: %s', self.system,
                               process_cpu_seconds_total)

            self.system.set_process_cpu_seconds_total(
                process_cpu_seconds_total, self.channels, self.logger)
        except:
            pass

        try:
            process_memory_usage = (prometheus_data['go_memstats_alloc_bytes'] \
                                    / prometheus_data[
                                        'go_memstats_alloc_bytes_total']) * 100

            process_memory_usage = float("{:.2f}".format(process_memory_usage))

            self._logger.debug('%s process_memory_usage: %s%', self.system,
                               process_memory_usage)

            self.system.set_process_memory_usage(process_memory_usage, \
                                                 self.channels, self.logger)
        except:
            pass

        try:
            virtual_memory_usage = \
                prometheus_data['process_virtual_memory_bytes']

            self._logger.debug('%s virtual_memory_usage: %s', self.system,
                               virtual_memory_usage)

            self.system.set_virtual_memory_usage(virtual_memory_usage, \
                                                 self.channels, self.logger)
        except:
            pass

        try:
            open_file_descriptors = (prometheus_data['process_open_fds'] /
                                     prometheus_data['process_max_fds']) * 100

            open_file_descriptors = float("{:.2f}".format( \
                open_file_descriptors))

            self._logger.debug('%s open_file_descriptors: %s%', self.system,
                               open_file_descriptors)

            self.system.set_open_file_descriptors(open_file_descriptors, \
                                                  self.channels, self.logger)
        except:
            pass

        try:
            node_cpu_seconds_idle = 0
            node_cpu_seconds_total = 0
            for i, j in enumerate(prometheus_data['node_cpu_seconds_total']):
                if json.loads(j)['mode'] == 'idle':
                    node_cpu_seconds_idle += \
                        prometheus_data['node_cpu_seconds_total'][j]
                node_cpu_seconds_total += \
                    prometheus_data['node_cpu_seconds_total'][j]

            system_cpu_usage = (100 - ((node_cpu_seconds_idle \
                                        / node_cpu_seconds_total) * 100))

            system_cpu_usage = float("{:.2f}".format(system_cpu_usage))

            self._logger.debug('%s system_cpu_usage: %s%', self.system,
                               system_cpu_usage)

            self.system.set_system_cpu_usage(system_cpu_usage, \
                                             self.channels, self.logger)
        except:
            pass

        try:
            system_ram_usage = ((prometheus_data['node_memory_MemTotal_bytes'] \
                                 - prometheus_data[
                                     'node_memory_MemAvailable_bytes']) /
                                prometheus_data[
                                    'node_memory_MemTotal_bytes']) * 100

            system_ram_usage = float("{:.2f}".format(system_ram_usage))

            self._logger.debug('%s system_ram_usage: %s%', self.system,
                               system_ram_usage)

            self.system.set_system_ram_usage(system_ram_usage, \
                                             self.channels, self.logger)
        except:
            pass

        node_filesystem_avail_bytes = 0
        node_filesystem_size_bytes = 0
        try:
            for i, j in enumerate( \
                    prometheus_data['node_filesystem_avail_bytes']):
                node_filesystem_avail_bytes += \
                    prometheus_data['node_filesystem_avail_bytes'][j]

            for i, j in enumerate( \
                    prometheus_data['node_filesystem_size_bytes']):
                node_filesystem_size_bytes += \
                    prometheus_data['node_filesystem_size_bytes'][j]

            system_storage_usage = 100 - \
                                   ((
                                                node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100)

            system_storage_usage = float("{:.2f}".format(system_storage_usage))

            self._logger.debug('%s system_storage_usage: %s%', self.system,
                               system_storage_usage)

            self.system.set_system_storage_usage(system_storage_usage, \
                                                 self.channels, self.logger)

            # Output status
            self._logger.info('%s status: %s', self._monitor_name, \
                self.status())
        except:
            pass
def get_node(nodes_so_far: List[NodeConfig],
             oasis_api_data_wrapper: OasisApiWrapper) -> Optional[NodeConfig]:
    # Get node's name
    node_names_so_far = [n.node_name for n in nodes_so_far]
    while True:
        node_name = input('Unique node name that is identical to the node name '
                          'specified in the API server configuration:\n')
        if node_name in node_names_so_far:
            print('Node name must be unique.')
        elif len(node_name) == 0:
            print('Node name cannot be empty.')
        else:
            break

    # Get the current chain ID
    while True:
        chain_name = input('Node\'s chain ID this can be found at '
                           'https://oasis.smartstake.io/:\n')
        if len(chain_name) == 0:
            print('Node\'s Chain ID cannot be empty.')
        else:
            break

    # Get node's API Url
    while True:
        api_url = input('Node\'s API url (typically http://API_IP:8686):\n')
        print('Trying to connect to endpoint {}/api/ping'.format(api_url))
        try:
            oasis_api_data_wrapper.ping_api(api_url)
            print('Success.')
            break
        except Exception:
            if not yn_prompt('Failed to connect to endpoint. Do '
                             'you want to try again? (Y/n)\n'):
                if not yn_prompt(
                        'Do you still want to add the node? (Y/n)\n'):
                    return None
                else:
                    break

    # Ask if node is a validator
    node_is_validator = yn_prompt('Is this node a validator? (Y/n)\n')


    # Get Node's Node Exporter Url
    while True:
        node_exporter_url = input('Node Exporter url (typically '
                        'http://NODE_EXPORTER_URL:9100/metrics), this will be '
                        'used to monitor the system statistics. If you do not '
                        'wish to monitor system statistics leave it blank:\n')
        if node_exporter_url:
            print('Trying to access Node Exporter at'.format(node_exporter_url))
            try:
                metric_to_test = ['process_cpu_seconds_total']

                prometheus_data = get_oasis_prometheus( \
                    node_exporter_url, metric_to_test, DUMMY_LOGGER)

                process_cpu_seconds_total = ( \
                        prometheus_data['process_cpu_seconds_total'])

                oasis_api_data_wrapper.ping_api(api_url)
                print('Successfully returned CPU seconds total', \
                    process_cpu_seconds_total)
                break
            except Exception:
                if not yn_prompt('Failed to connect to endpoint. Do '
                                'you want to try again? (Y/n)\n'):
                    if not yn_prompt(
                            'Do you still want to add the node? (Y/n)\n'):
                        return None
                    else:
                        break
        else:
            print('Skipping system monitoring setup, the system where the node '
                  'is installed will not be monitored')
            break

    # Ask if node is an archive node.
    # Note: if the node is a validator, it must also be an archive node.
    # However, it was done this way in case of changes in future updates.
    node_is_archive_node = yn_prompt('Is this node an archive node? (Y/n)\n')

    monitor_node = yn_prompt('Would you like to monitor this node? (Y/n) \n')

    # Get validator's node public key
    if node_is_validator:
        while True:
            node_public_key = input('Node\'s public identifier, found inside '
                            'the file entity.json within the key-value pair '
                            '"nodes":"NODE_PUBLIC_KEY", found on the machine '
                            'running the node (typically the format is  : '
                            'J4i/ADAze7jYjcmPZvTFHD/tMa3wt9AMeaQALPXZebs=) : ')

            if not node_public_key.strip():
                if not yn_prompt('You cannot leave the node_public_key '
                                 'field empty for a validator. Do you want to '
                                 'try again? (Y/n)\n'):
                    return None
            else:
                break
    else:
        node_public_key = ''

    # Return node
    return NodeConfig(node_name, chain_name, api_url, node_public_key, \
                      node_is_validator, node_exporter_url, monitor_node, \
                      node_is_archive_node, True)