Beispiel #1
0
def get_metrics_for_node(skale, node, is_test_mode):
    host = GOOD_IP if is_test_mode else node['ip']
    metrics = get_ping_node_results(host)
    if not is_test_mode:
        healthcheck = get_containers_healthcheck(host)
        schains_check = check_schains_for_node(skale, node['id'], host)
        metrics[
            'is_offline'] = metrics['is_offline'] | healthcheck | schains_check

    logger.info(f'Received metrics from node ID = {node["id"]}: {metrics}')
    return metrics
Beispiel #2
0
def check_schain(schain, node_ip):
    schain_name = schain['name']
    schain_endpoint = get_schain_endpoint(node_ip, schain['http_rpc_port'])
    logger.info(f'\nChecking {schain_name}: {schain_endpoint}')

    try:
        web3 = Web3(HTTPProvider(schain_endpoint))
        block_number = web3.eth.blockNumber
        logger.info(f"Current block number for {schain_name} = {block_number}")
        return 0
    except Exception as err:
        logger.error(f'Error occurred while getting block number : {err}')
        return 1
Beispiel #3
0
def get_ping_node_results(host) -> dict:
    """Returns a node host metrics (downtime and latency)"""

    ping_parser = pingparsing.PingParsing()
    transmitter = pingparsing.PingTransmitter()
    transmitter.destination_host = host
    transmitter.ping_option = '-w1'
    transmitter.count = 3
    result = transmitter.ping()
    if ping_parser.parse(
            result).as_dict()['rtt_avg'] is None or ping_parser.parse(
                result).as_dict()['packet_loss_count'] > 0:
        is_offline = True
        latency = -1
        logger.info('No connection to host!')
    else:
        is_offline = False
        latency = int((ping_parser.parse(result).as_dict()['rtt_avg']) * 1000)

    return {'is_offline': is_offline, 'latency': latency}
Beispiel #4
0
def get_containers_healthcheck(host):
    """Return 0 if OK or 1 if failed"""
    url = get_containers_healthcheck_url(host)
    logger.info(f'Checking: {url}')
    try:
        response = requests.get(url, timeout=15)
    except requests.exceptions.ConnectionError as err:
        logger.info(f'Could not connect to {url}')
        logger.error(err)
        return 1
    except Exception as err:
        logger.info(f'Could not get data from {url}')
        logger.error(err)
        return 1

    if response.status_code != requests.codes.ok:
        logger.info(f'Request failed, status code: {response.status_code}')
        return 1

    res = response.json()
    if res.get('error') is not None:
        logger.info(res['error'])
        return 1
    data = res.get('data')
    if data is None:
        logger.info(f'No data found checking {url}')
        return 1

    for container in data:
        if not container['state']['Running'] or container['state']['Paused']:
            logger.info(f'{container["name"]} is not running or paused')
            return 1
    return 0