Beispiel #1
0
 def get_tcpdump_command():
     args = [['not', 'port', str(port), 'and'] for port in FILTER_PORTS]
     args = [j for i in args for j in i]
     command = ['tcpdump', '-c', str(TRAFFIC_SAMPLE), '-i', 'any', '-nn', 'ip', 'and', '-l', '-t'] + args + \
               ['tcp', 'and', '(((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)']
     log.info('Running command: {}'.format(' '.join(command)))
     return command
Beispiel #2
0
    async def compute_util_and_waste(self):
        info = await get_container_utilization()
        try:
            containers = [docker_id[len('/docker/'):] for docker_id in info.keys()]
            util_list = [self.get_util(value) for value in info.values()]
            self.filter_dadvisor(containers, util_list)
        except Exception as e:
            log.error(e)
            return

        if not util_list:
            return

        cpu_util_list, mem_util_list = zip(*util_list)

        self.scale_list(cpu_util_list)
        self.scale_list(mem_util_list)

        cpu_waste_list = self.get_waste(cpu_util_list)
        mem_waste_list = self.get_waste(mem_util_list)

        log.info(cpu_util_list)

        for i, container in enumerate(containers):
            self.cpu_util_container_sum.labels(src=container, src_host=IP) \
                .inc(cpu_util_list[i] * FACTOR)
            self.mem_util_container_sum.labels(src=container, src_host=IP) \
                .inc(mem_util_list[i] * FACTOR)
            self.cpu_waste_container_sum.labels(src=container, src_host=IP) \
                .inc(cpu_waste_list[i] * FACTOR)
            self.mem_waste_container_sum.labels(src=container, src_host=IP) \
                .inc(mem_waste_list[i] * FACTOR)
Beispiel #3
0
 async def compute_network_usage(self):
     data = await get_container_stats()
     try:
         containers = [docker_id[len('/docker/'):] for docker_id in data.keys()]
         network_values = [self.get_network(value) for value in data.values()]
         self.filter_dadvisor(containers, network_values)
         for i, container in enumerate(containers):
             prev = self.prev_network_container.get(container, 0)
             log.info(f'Container {container}: {prev}')
             self.prev_network_container[container] = network_values[i]
             self.network_container_sum.labels(src=container, src_host=IP) \
                 .inc(network_values[i] - prev)
     except Exception as e:
         log.error(e)
Beispiel #4
0
    async def run(self):
        elapsed = 0
        while self.running:
            try:
                await asyncio.sleep(SLEEP_TIME - elapsed)
                now = datetime.utcnow()
                log.info(f'Sleeping {SLEEP_TIME - elapsed} sec')
                # Execute once per SLEEP_TIME
                await self.compute_network_usage()
                await self.compute_util_and_waste()
                now2 = datetime.utcnow()
                elapsed = (now2 - now).seconds

            except Exception as e:
                log.error(e)
        log.info('StatsCollector stopped')
Beispiel #5
0
    async def run(self):
        """
        Performs the following two actions:
        - only at initialization: collect static information about the host price
        - continuously (every 30 sec) perform the following actions:
            - find new containers
            - validate own containers (find out ip address and if they're alive)
        :return:
        """

        while self.running:
            try:
                await asyncio.sleep(SLEEP_TIME)
                await self.collect_own_containers()
                await self.validate_own_containers()
            except Exception as e:
                log.error(e)
        log.info('ContainerCollector stopped')
Beispiel #6
0
    def run(self):
        self.check_installation()
        command = self.get_tcpdump_command()
        multiplier = 1

        while self.running:
            """
            One iteration of this while loop performns the following actions:
            1. Run the tcpdump command that captures TRAFFIC_SAMPLE requests. 
                This is collected in X seconds.
            2. Resolve these requests by communicating with the other nodes
            3. Sleep k*X seconds, with a lower- and upperbound.
            """
            start_time = time.time()
            p = subprocess.Popen(command, stdout=subprocess.PIPE)

            # parse results
            for row in iter(p.stdout.readline, b''):
                try:
                    dataflow = parse_row(self.container_collector, row.decode('utf-8'))
                    dataflow.size = (dataflow.size + HEADER_SIZE) * multiplier
                    self.analyser.loop.create_task(self.analyser.analyse_dataflow(dataflow))
                except Exception as e:
                    log.error(e)
                    log.error('Cannot parse row: {}'.format(row.decode('utf-8').rstrip()))

            end_time = time.time()
            elapsed = end_time - start_time
            log.info('Monitoring {} packets in {} sec'.format(TRAFFIC_SAMPLE, elapsed))

            self.analyser.loop.create_task(self.analyser.cache.resolve(self.node_collector))

            # sleep K times the elapsed time. Minus the time it takes to resolve the cache
            sleep_time = TRAFFIC_K * elapsed - (time.time() - end_time)
            sleep_time = min(max(sleep_time, TRAFFIC_SLEEP_MIN), TRAFFIC_SLEEP_MAX)

            if elapsed != 0:
                multiplier = (sleep_time + elapsed) / elapsed
            else:
                multiplier = 1
            log.info(f'Multiplier: {multiplier}')

            log.info('Sleeping for: {} sec'.format(sleep_time))
            time.sleep(sleep_time)

        log.info('Inspector thread stopped')
Beispiel #7
0
 def stop(self):
     log.info('Stopping InspectorThread')
     self.running = False
Beispiel #8
0
async def run_app(app):
    runner = web.AppRunner(app)
    await runner.setup()
    site = web.TCPSite(runner, '0.0.0.0', INTERNAL_PORT)
    log.info('Running on localhost:{}'.format(INTERNAL_PORT))
    await site.start()
Beispiel #9
0
IP = os.environ.get('IP', ip)

IS_SUPER_NODE = os.environ.get('TYPE', 'NODE') == 'SUPERNODE'
PROXY_PORT = int(os.environ.get('DADVISOR_PORT', 14100))
INTERNAL_PORT = 14101
PROMETHEUS_PORT = 14102

CADVISOR_URL = 'http://localhost:14104'
PROMETHEUS_URL = f'http://localhost:{PROXY_PORT}/prometheus'
TRACKER = os.environ.get('TRACKER', 'http://35.204.250.252:14100')

FILTER_PORTS = os.environ.get(
    'FILTER_PORTS',
    f'22,{PROXY_PORT},{INTERNAL_PORT},{PROMETHEUS_PORT}').split(',')
log.info(f'Filtering internet traffic ports: {FILTER_PORTS}')

# INTERNET TRAFFIC
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TRAFFIC_SAMPLE = int(os.environ.get('TRAFFIC_SAMPLE', 1000))
TRAFFIC_K = int(os.environ.get('TRAFFIC_K', 9))
TRAFFIC_SLEEP_MIN = int(os.environ.get('TRAFFIC_SLEEP_MIN', 1))
TRAFFIC_SLEEP_MAX = int(os.environ.get('TRAFFIC_SLEEP_MAX', 150))

SLEEP_TIME = int(os.environ.get('SLEEP_TIME', 60))

PREFIX = '/dadvisor'

LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')
log.setLevel(LOG_LEVEL)
# Possible log values: 'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'
Beispiel #10
0
 def stop(self):
     self.running = False
     log.info('Stopping containerCollector')
Beispiel #11
0
 def scale_list(util_list):
     s = sum(util_list)
     if s > 1:
         log.info(f'Scaling list: {util_list}')
         for i, item in enumerate(util_list):
             util_list[i] = item / s
Beispiel #12
0
def remove_node(loop, node):
    log.info(f'Removing peer: {node}')
    loop.create_task(_send_post(f'{TRACKER}/root/remove', data=node))
Beispiel #13
0
def register_node(loop, node):
    log.info(f'Registering peer: {node}')
    loop.create_task(_send_post(f'{TRACKER}/root/add', data=node))