Esempio n. 1
0
    async def compute_util_and_waste(self):
        info = await get_container_utilization()
        try:
            containers = [docker_id[len('/docker/'):] for docker_id in info.keys()]
            util_list = [self.get_util(value) for value in info.values()]
            self.filter_dadvisor(containers, util_list)
        except Exception as e:
            log.error(e)
            return

        if not util_list:
            return

        cpu_util_list, mem_util_list = zip(*util_list)

        self.scale_list(cpu_util_list)
        self.scale_list(mem_util_list)

        cpu_waste_list = self.get_waste(cpu_util_list)
        mem_waste_list = self.get_waste(mem_util_list)

        log.info(cpu_util_list)

        for i, container in enumerate(containers):
            self.cpu_util_container_sum.labels(src=container, src_host=IP) \
                .inc(cpu_util_list[i] * FACTOR)
            self.mem_util_container_sum.labels(src=container, src_host=IP) \
                .inc(mem_util_list[i] * FACTOR)
            self.cpu_waste_container_sum.labels(src=container, src_host=IP) \
                .inc(cpu_waste_list[i] * FACTOR)
            self.mem_waste_container_sum.labels(src=container, src_host=IP) \
                .inc(mem_waste_list[i] * FACTOR)
Esempio n. 2
0
 def filter_dadvisor(self, containers, values):
     """ Don't compute utilization values about dAdvisor """
     try:
         dadvisor_index = containers.index(self.container_collector.dadvisor_id)
         del containers[dadvisor_index]
         del values[dadvisor_index]
     except ValueError:
         log.error(f'dadvisor_id unkown: {self.container_collector.dadvisor_id}')
Esempio n. 3
0
async def _send_get_json(url):
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                return await resp.json()
    except Exception as e:
        log.error(e)
        log.error(f'Cannot reach {url}')
        return None
Esempio n. 4
0
 def get_util(self, value):
     try:
         cores = self.node_collector.my_node_stats.get('num_cores', 1)
         memory = self.node_collector.my_node_stats.get('memory', 8 * 2 ** 30)
         cpu = value['minute_usage']['cpu']['mean'] / (cores * 1000.0)
         memory_percentage = value['minute_usage']['memory']['mean'] / memory
         return cpu, memory_percentage
     except Exception as e:
         log.error(e)
         return 0
Esempio n. 5
0
 def get_network(value):
     amount = 0
     row = value[0]
     try:
         network = row['network']
         interfaces = network['interfaces']
         amount = sum(interface['tx_bytes'] for interface in interfaces)
     except Exception as e:
         log.error(e)
     return amount
Esempio n. 6
0
async def _send_post(url, data):
    try:
        async with aiohttp.ClientSession() as session:
            await session.post(url,
                               json=json.dumps(data, cls=JSONCustomEncoder))
        return True
    except Exception as e:
        log.error(e)
        log.error(f'Cannot reach {url}')
    return False
Esempio n. 7
0
 async def compute_network_usage(self):
     data = await get_container_stats()
     try:
         containers = [docker_id[len('/docker/'):] for docker_id in data.keys()]
         network_values = [self.get_network(value) for value in data.values()]
         self.filter_dadvisor(containers, network_values)
         for i, container in enumerate(containers):
             prev = self.prev_network_container.get(container, 0)
             log.info(f'Container {container}: {prev}')
             self.prev_network_container[container] = network_values[i]
             self.network_container_sum.labels(src=container, src_host=IP) \
                 .inc(network_values[i] - prev)
     except Exception as e:
         log.error(e)
Esempio n. 8
0
    async def run(self):
        elapsed = 0
        while self.running:
            try:
                await asyncio.sleep(SLEEP_TIME - elapsed)
                now = datetime.utcnow()
                log.info(f'Sleeping {SLEEP_TIME - elapsed} sec')
                # Execute once per SLEEP_TIME
                await self.compute_network_usage()
                await self.compute_util_and_waste()
                now2 = datetime.utcnow()
                elapsed = (now2 - now).seconds

            except Exception as e:
                log.error(e)
        log.info('StatsCollector stopped')
Esempio n. 9
0
    def run(self):
        self.check_installation()
        command = self.get_tcpdump_command()
        multiplier = 1

        while self.running:
            """
            One iteration of this while loop performns the following actions:
            1. Run the tcpdump command that captures TRAFFIC_SAMPLE requests. 
                This is collected in X seconds.
            2. Resolve these requests by communicating with the other nodes
            3. Sleep k*X seconds, with a lower- and upperbound.
            """
            start_time = time.time()
            p = subprocess.Popen(command, stdout=subprocess.PIPE)

            # parse results
            for row in iter(p.stdout.readline, b''):
                try:
                    dataflow = parse_row(self.container_collector, row.decode('utf-8'))
                    dataflow.size = (dataflow.size + HEADER_SIZE) * multiplier
                    self.analyser.loop.create_task(self.analyser.analyse_dataflow(dataflow))
                except Exception as e:
                    log.error(e)
                    log.error('Cannot parse row: {}'.format(row.decode('utf-8').rstrip()))

            end_time = time.time()
            elapsed = end_time - start_time
            log.info('Monitoring {} packets in {} sec'.format(TRAFFIC_SAMPLE, elapsed))

            self.analyser.loop.create_task(self.analyser.cache.resolve(self.node_collector))

            # sleep K times the elapsed time. Minus the time it takes to resolve the cache
            sleep_time = TRAFFIC_K * elapsed - (time.time() - end_time)
            sleep_time = min(max(sleep_time, TRAFFIC_SLEEP_MIN), TRAFFIC_SLEEP_MAX)

            if elapsed != 0:
                multiplier = (sleep_time + elapsed) / elapsed
            else:
                multiplier = 1
            log.info(f'Multiplier: {multiplier}')

            log.info('Sleeping for: {} sec'.format(sleep_time))
            time.sleep(sleep_time)

        log.info('Inspector thread stopped')
Esempio n. 10
0
    async def run(self):
        """
        Performs the following two actions:
        - only at initialization: collect static information about the host price
        - continuously (every 30 sec) perform the following actions:
            - find new containers
            - validate own containers (find out ip address and if they're alive)
        :return:
        """

        while self.running:
            try:
                await asyncio.sleep(SLEEP_TIME)
                await self.collect_own_containers()
                await self.validate_own_containers()
            except Exception as e:
                log.error(e)
        log.info('ContainerCollector stopped')
Esempio n. 11
0
    async def run(self):
        """
        This run method performs the following two actions:
        1. register this peer in the tracker
        2. continuously perform the following actions:
            - validate other nodes
        :return:
        """
        register_node(self.loop, self.my_node)

        while self.running:
            try:
                await asyncio.sleep(SLEEP_TIME)
                self.loop.create_task(self.add_nodes(await get_all_nodes()))
                self.check_removal_counter += 1
                if self.check_removal_counter == CHECK_REMOVE:
                    self.check_removal_counter = 0
                    self.loop.create_task(self.check_nodes())
            except Exception as e:
                log.error(e)
Esempio n. 12
0
    async def resolve(self, nodes_collector):
        """
        Ask all nodes to resolve their ports into a container-hash.
        After this function has been called, the cache is empty
        """
        for ip, data_list in list(self.cache.items()):

            node = nodes_collector.is_other_node(ip)
            if not node:
                log.error(f'Node not found {ip}')
                continue

            try:
                mapping = await get_mapping(node)
                ports = mapping['ports']
                # port is encoded as string, therefore decode to int
                ports = {int(port): ip for port, ip in ports.items()}
                containers = mapping['containers']

                for (from_to, local_hash, port, size) in data_list:
                    ip = ports.get(port, None)
                    remote_hash = containers.get(ip, None)
                    if local_hash and remote_hash:
                        if from_to == TO:
                            self.counter.labels(src=local_hash, dst=remote_hash, src_host=IP)\
                                .inc(size)
                        elif from_to == FROM:
                            self.counter.labels(src=remote_hash, dst=local_hash, src_host=IP)\
                                .inc(size)
                try:
                    del self.cache[ip]
                except KeyError:
                    log.debug(f'Cannot remove {ip} from self.cache')
            except Exception as e:
                log.error(e)
        self.cache = {}
Esempio n. 13
0
 def check_installation():
     try:
         subprocess.Popen(['tcpdump', '-D'], stdout=subprocess.PIPE)
     except ProcessLookupError:
         log.error('tcpdump is not installed. Please install it before running this code.')
         exit(-1)
Esempio n. 14
0
This file contains all configurable options.
In the future, set these values based on environment variables.
"""

import os
import socket
from datetime import datetime

from dadvisor.log import log

# INTERNAL PORTS AND ADDRESSES
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
try:
    ip = socket.gethostbyname(socket.gethostname())
except socket.gaierror as e:
    log.error(e)
    ip = 'localhost'

IP = os.environ.get('IP', ip)

IS_SUPER_NODE = os.environ.get('TYPE', 'NODE') == 'SUPERNODE'
PROXY_PORT = int(os.environ.get('DADVISOR_PORT', 14100))
INTERNAL_PORT = 14101
PROMETHEUS_PORT = 14102

CADVISOR_URL = 'http://localhost:14104'
PROMETHEUS_URL = f'http://localhost:{PROXY_PORT}/prometheus'
TRACKER = os.environ.get('TRACKER', 'http://35.204.250.252:14100')

FILTER_PORTS = os.environ.get(
    'FILTER_PORTS',