def test_list_tasks_without_app_id(m): fake_response = '{ "tasks": [ { "appId": "/anapp", "healthCheckResults": ' \ '[ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", "lastFailure": null, ' \ '"lastSuccess": "2014-10-03T22:57:41.643Z", "taskId": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799" } ],' \ ' "host": "10.141.141.10", "id": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799", "ports": [ 31000 ], ' \ '"servicePorts": [ 9000 ], "stagedAt": "2014-10-03T22:16:27.811Z", "startedAt": "2014-10-03T22:57:41.587Z", ' \ '"version": "2014-10-03T22:16:23.634Z" }, { "appId": "/anotherapp", ' \ '"healthCheckResults": [ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", ' \ '"lastFailure": null, "lastSuccess": "2014-10-03T22:57:41.649Z", "taskId": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799" } ], ' \ '"host": "10.141.141.10", "id": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799", "ports": [ 31001 ], "servicePorts": [ 9000 ], ' \ '"stagedAt": "2014-10-03T22:16:33.814Z", "startedAt": "2014-10-03T22:57:41.593Z", "version": "2014-10-03T22:16:23.634Z" } ] }' m.get('http://fake_server/v2/tasks', text=fake_response) mock_client = MarathonClient(servers='http://fake_server') actual_deployments = mock_client.list_tasks() expected_deployments = [ models.task.MarathonTask( app_id="/anapp", health_check_results=[ models.task.MarathonHealthCheckResult( alive=True, consecutive_failures=0, first_success="2014-10-03T22:57:02.246Z", last_failure=None, last_success="2014-10-03T22:57:41.643Z", task_id="bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799" ) ], host="10.141.141.10", id="bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799", ports=[ 31000 ], service_ports=[ 9000 ], staged_at="2014-10-03T22:16:27.811Z", started_at="2014-10-03T22:57:41.587Z", version="2014-10-03T22:16:23.634Z" ), models.task.MarathonTask( app_id="/anotherapp", health_check_results=[ models.task.MarathonHealthCheckResult( alive=True, consecutive_failures=0, first_success="2014-10-03T22:57:02.246Z", last_failure=None, last_success="2014-10-03T22:57:41.649Z", task_id="bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799" ) ], host="10.141.141.10", id="bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799", ports=[31001], service_ports=[9000], staged_at="2014-10-03T22:16:33.814Z", started_at="2014-10-03T22:57:41.593Z", version="2014-10-03T22:16:23.634Z" )] assert actual_deployments == expected_deployments
def test_list_tasks_with_app_id(m): fake_response = '{ "tasks": [ { "appId": "/anapp", "healthCheckResults": [ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", "lastFailure": null, "lastSuccess": "2014-10-03T22:57:41.643Z", "taskId": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799" } ], "host": "10.141.141.10", "id": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799", "ports": [ 31000 ], "servicePorts": [ 9000 ], "stagedAt": "2014-10-03T22:16:27.811Z", "startedAt": "2014-10-03T22:57:41.587Z", "version": "2014-10-03T22:16:23.634Z" }, { "appId": "/anotherapp", "healthCheckResults": [ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", "lastFailure": null, "lastSuccess": "2014-10-03T22:57:41.649Z", "taskId": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799" } ], "host": "10.141.141.10", "id": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799", "ports": [ 31001 ], "servicePorts": [ 9000 ], "stagedAt": "2014-10-03T22:16:33.814Z", "startedAt": "2014-10-03T22:57:41.593Z", "version": "2014-10-03T22:16:23.634Z" } ] }' m.get('http://fake_server/v2/tasks', text=fake_response) mock_client = MarathonClient(servers='http://fake_server') actual_deployments = mock_client.list_tasks(app_id='/anapp') expected_deployments = [ models.task.MarathonTask( app_id="/anapp", health_check_results=[ models.task.MarathonHealthCheckResult( alive=True, consecutive_failures=0, first_success="2014-10-03T22:57:02.246Z", last_failure=None, last_success="2014-10-03T22:57:41.643Z", task_id= "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799") ], host="10.141.141.10", id="bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799", ports=[31000], service_ports=[9000], staged_at="2014-10-03T22:16:27.811Z", started_at="2014-10-03T22:57:41.587Z", version="2014-10-03T22:16:23.634Z") ] assert actual_deployments == expected_deployments
def enable_logstash(): print "Checking ELK entries\n" endpoint = os.getenv('MARATHON_ENDPOINT') username = os.getenv('MARATHON_HTTP_USER') password = os.getenv('MARATHON_HTTP_PASSWORD') elk_host = None if endpoint: try: print 'Discovering configuration from %s\n' % endpoint c = MarathonClient('https://%s' % endpoint, username=username, password=password) tasks = c.list_tasks('yroblaelk') for task in tasks: if task.started_at: elk_host = task.host break except: pass # check entries in wsrep_cluster_address if elk_host: print 'Found ELK address %s\n' % elk_host for line in fileinput.input(LOGSTASH_CONF_FILE, inplace=True): line_content = line sys.stdout.write(line.replace("ELK_HOST", elk_host)) # reboot logstash subprocess.call(["service", "logstash-forwarder", "restart"])
def _get_hosts_with_container(self, context, cluster): marathon_client = MarathonClient( 'http://' + cluster.api_address + '/marathon/') hosts = set() for task in marathon_client.list_tasks(): hosts.add(task.host) return hosts
def main(args): migration_hosts = args.hosts.replace('"','').replace('\'','').split(',') marathon_client = MarathonClient(args.url) # Get the running marathon application dictionary running_instances = utils.dict_by_key_and_value(lambda x: x.id, lambda y: y.instances, marathon_client.list_apps()) print(">>> Total Running Applications: ") print(json.dumps(running_instances, sort_keys=True, indent=4, separators=(',', ': '))) # Get the running marathon applications for all hosts which are going for maintenance all_tasks = marathon_client.list_tasks() filtered_tasks = [task for task in all_tasks if task.host in migration_hosts] dicted_tasks = utils.dict_by_key(lambda x: x.app_id, filtered_tasks) print(">>> Total Running Application: ") print(json.dumps(dicted_tasks.keys(), sort_keys=True, indent=4, separators=(',', ': '))) # Tasks migration migrate_tasks(marathon_client, dicted_tasks, migration_hosts, args.force)
def ports_used(request): mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) apps = mc.list_apps() used_ports = {} for app in apps: tasks = mc.list_tasks(app.id) for task in tasks: if task.host in used_ports.keys(): used_ports[task.host].extend(task.ports) else: used_ports[task.host] = task.ports list_host_ports = [] for key in sorted(used_ports.keys()): list_host_ports.append([key, sorted(used_ports[key])]) data = {} data['used_ports'] = list_host_ports return render(request, 'marathon_mgmt/ports_used.html', data)
class Scaler: """Class for Scaling""" def __init__(self, app_name, config): self.logger = logging.getLogger("autoscaling") self.logger.setLevel(logging.DEBUG) self.logger.debug("Init object scaler...") self.config = config self.logger.debug("Connect RESTful mariadb and get policies...") conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'], config["MARIA_RESTFUL"]['port']) conn.request("GET", "/app/name/" + app_name) json_app = conn.getresponse().read().decode("utf-8") self.app = json.loads(json_app) conn.request("GET", "/app/name/" + app_name + "/policies") json_policies = conn.getresponse().read().decode("utf-8") self.app["policies"] = json.loads(json_policies) self.logger.debug("Connect influxdb and marathon...") self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"], config["INFLUXDB"]["port"], config["INFLUXDB"]["username"], config["INFLUXDB"]["password"], config["INFLUXDB"]["db_name"]) self.marathon_client = MarathonClient('http://' + config["MARATHON"]['host'] + ':' + config["MARATHON"]['port']) self.app["instance"] = self.marathon_client.get_app(app_name).instances self.app["mem"] = self.marathon_client.get_app(app_name).mem self.app["cpus"] = self.marathon_client.get_app(app_name).cpus self.logger.debug("Reconfig haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://" + config["MARATHON"]["host"] + ":" + config["MARATHON"]["port"] + " --haproxy-config /etc/haproxy/haproxy.cfg") def setup_logging(self, log_file="autoscaling.log", level=logging.INFO, formatter=None): if (formatter == None): formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler(log_file) fh.setLevel(level) fh.setFormatter(formatter) self.logger.addHandler(fh) def get_cpu_usage(self, container_name): """Return cpu usage of container_name @param string container_name container name """ query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage from stats where container_name = '" + container_name + "' and time > now()-5m group by time(2s) " result = self.influx_client.query(query) points = result[0]["points"] return (points[0][1] / 1000000000 / self.app["cpus"]) * 100 def get_container_name(self, mesos_task_id): """Return container name mapping with mesos_task_id in messos @param string mesos_task_id """ query = "select container_name from " + self.config["INFLUXDB"][ "ts_mapping"] + " where time>now() - 5m and mesos_task_id = '" + mesos_task_id + "' limit 1" result = self.influx_client.query(query) points = result[0]["points"] return points[0][2] def get_containers_name(self): """Return list all containers name of application have name app_name @param string app_name name of application @return list all containers name of app_name """ tasks = self.marathon_client.list_tasks(self.app["name"]) containers_name = [] for task in tasks: containers_name.append(self.get_container_name(task.id)) return containers_name def avg_mem_usage(self, containers_name): """Return avg memmory usage of all containers in list containers_name @param list containers_name list containers name @return float avg mem usage """ number_container = len(containers_name) containers_name = ["'" + x + "'" for x in containers_name] containers_name = ",".join(containers_name) query = "select memory_usage,container_name from stats where time > now()-5m and container_name in (" + containers_name + ") limit " + str( number_container * 2) result = self.influx_client.query(query) points = result[0]["points"] sum_memory_usage = 0 for point in points: if (point[3] != None): sum_memory_usage += point[3] / (self.app["mem"] * 1048576) * 100 return sum_memory_usage / number_container def avg_cpu_usage(self, containers_name): """Return avg cpu usage of all containers in list containers_name @param list containers_name list containers name @return float avg cpu usage """ number_container = len(containers_name) containers_name = ["'" + x + "'" for x in containers_name] containers_name = ",".join(containers_name) query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage,container_name from stats where time > now()-5m and container_name in (" + containers_name + ") group by time(10s),container_name limit " + str( number_container) result = self.influx_client.query(query) points = result[0]["points"] sum_cpu_usage = 0 for point in points: sum_cpu_usage += point[1] / 1000000000 / self.app["cpus"] * 100 return sum_cpu_usage / number_container def scale(self, delta): """sacle app_name (add or remove) delta intances @param string app_name name of application @param int delta number intances add or remove """ new_instance = self.app["instance"] + delta if (new_instance > self.app['max_instances']): new_instance = self.app['max_instances'] if (new_instance < self.app['min_instances']): new_instance = self.app['min_instances'] if (new_instance != self.app["instance"]): self.marathon_client.scale_app(self.app["name"], new_instance) self.logger.debug("Scaling " + self.app["name"] + " to: " + str(new_instance)) self.logger.debug("Waiting for config file haproxy.cfg...") time.sleep(self.config["TIME"]['w_config_ha']) self.logger.debug("Config file haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://" + self.config["MARATHON"]["host"] + ":" + self.config["MARATHON"]["port"] + " --haproxy-config /etc/haproxy/haproxy.cfg") self.app["instance"] = self.marathon_client.get_app( self.app["name"]).instances self.logger.debug("Sleep " + str(self.config["TIME"]['after_scale']) + "s...") time.sleep(self.config["TIME"]['after_scale']) def check_rule(self, policie, value): """Check rule and return number intances need scale @param models.Policie policies @param tuple value values of metric @return integer number intances need scale """ delta = {} delta["up"] = 0 delta["down"] = 0 # Check upper_threshold if (value[policie["metric_type"]] > policie["upper_threshold"]): delta['up'] = policie["instances_in"] # Check lower_threshold if (value[policie["metric_type"]] < policie["lower_threshold"]): delta['down'] = policie["instances_out"] return delta def autoscaling(self): while True: try: containers_name = self.get_containers_name() avg_cpu = self.avg_cpu_usage(containers_name) avg_mem = self.avg_mem_usage(containers_name) self.logger.info( "Avg cpu usage, avg memmory usage, current instance: %f %f %d", avg_cpu, avg_mem, self.app["instance"]) rs_detal = {} rs_detal['up'] = 0 rs_detal['down'] = 10 for policie in self.app["policies"]: delta = self.check_rule(policie, (avg_cpu, avg_mem)) if (rs_detal['up'] < delta['up']): rs_detal['up'] = delta['up'] if (rs_detal['down'] > delta['down']): rs_detal['down'] = delta['down'] if (rs_detal['up'] > 0): self.scale(rs_detal['up']) elif (rs_detal['down'] > 0): self.scale(0 - rs_detal['down']) except Exception as e: self.logger.debug(str(e)) finally: time.sleep(self.config["TIME"]['monitor'])
with open('/etc/rabbitmq/rabbitmq-env.conf', 'a') as file: file.write('NODENAME=rabbit@%s\n' % current_host) file.write('NODE_PORT=31672\n') file.write('DIST_PORT=31673\n') # start rabbit print "Starting cluster" endpoint = os.getenv('MARATHON_ENDPOINT') username = os.getenv('MARATHON_HTTP_USER') password = os.getenv('MARATHON_HTTP_PASSWORD') peers = [] if endpoint: try: print 'Discovering configuration from %s' % endpoint c = MarathonClient('http://%s' % endpoint, username=username, password=password) tasks = c.list_tasks(APP_ID) for task in tasks: if task.started_at and task.host != host: peers.append(task.host) except: pass cluster = None if len(peers) > 0: cluster = peers[0] print 'Found cluster %s' % cluster if not cluster: # set ha policy subprocess.call(['sudo', '-E', 'service', 'rabbitmq-server', 'start']) time.sleep(10)
class HealthCheckBencher(object): def __init__(self, marathon_url, image, tasks): self.concurrency = 20 self.docker_image = image self.app_base_name = 'health-check-test-' self.total_tasks_cout = int(tasks) self.instances_per_app = 50 if tasks < self.instances_per_app: self.instances_per_app = self.total_tasks_cout self.app_count = 1 else: self.app_count = self.total_tasks_cout/self.instances_per_app self.heath_check_interval = 30 self.test_duration = 20 self.marathon_cluster = MarathonClient(marathon_url, timeout=240) self.work_queue = Queue() self.result_queue = Queue() self.app_list_queue = Queue() self.action_list = [self.start_collect, 'sleep={}'.format(self.test_duration), self.get_stats] def remove_apps(self): apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith("/"+self.app_base_name): self.marathon_cluster.delete_app(app.id) active = 0 while True: apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith(self.app_base_name): active += 1 if active == 0: break def create_app(self, id): port_mapping = MarathonContainerPortMapping(container_port=80, protocol="tcp") app_docker = MarathonDockerContainer( image=self.docker_image, network="BRIDGE", force_pull_image=True, port_mappings=[port_mapping]) app_container = MarathonContainer(docker=app_docker) http_health_check = MarathonHealthCheck( protocol="HTTP", path="/status", grace_period_seconds=300, interval_seconds=self.heath_check_interval, timeout_seconds=20, max_consecutive_failures=0 ) app_suffix = str(md5(str(random())).hexdigest()) app_name = self.app_base_name + app_suffix new_app = MarathonApp(cpus=CPUS, mem=MEM, disk=DISK, container=app_container, health_checks=[http_health_check], instances=self.instances_per_app, max_launch_delay_seconds=5) print("Creating {}".format(app_name)) self.marathon_cluster.create_app(app_id=app_name, app=new_app) self.app_list_queue.put(app_name) return None def wait_instances(self, app_name): health_ok = 0 while health_ok < self.instances_per_app: health_ok = 0 tasks = self.marathon_cluster.list_tasks(app_name) for task in tasks: if task.health_check_results: health_ok += 1 def start_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/start_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was started') else: print(task['id']+': failed to start collecter') def stop_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/stop_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was stopped') else: print(task['id']+': failed to stop collecter') def clear_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/clear_stats' res = urlopen(url) if res.getcode() == 200: print(task['id']+': stats was dropped') else: print(task['id']+': stats was dropped') def get_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/get_timestamps' try: res = urlopen(url) except Exception: print("URL req failed") self.result_queue.put({'id': task['id'], 'status': 'Failed', 'data': []}) return if res.getcode() == 200: data = res.read() timestamps = data.split(',') self.result_queue.put({'id': task['id'], 'status': 'ok', 'data': timestamps}) elif res.getcode() == 202: print("Collecting is not enabled") self.result_queue.put({'id': task['id'], 'status': 'Collecting is not enabled', 'data': []}) else: print("Unknown response code") self.result_queue.put({'id': task['id'], 'status': 'Unknown response code', 'data': []}) def repeat(self, action): while self.work_queue.empty() is False: try: iteration = self.work_queue.get_nowait() except Empty: continue action(iteration) self.work_queue.task_done() def fill_queue(self, iterations): for iteration in iterations: self.work_queue.put(iteration) def get_tasks(self): res = [] tasks = self.marathon_cluster.list_tasks() for task in tasks: if not task.id.startswith('health-check-test-'): continue res.append({'id': str(task.id), 'host': str(task.host), 'port': str(task.ports[0])}) return res def create_apps(self): self.fill_queue(range(self.app_count)) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.create_app,)) worker.start() self.work_queue.join() while self.app_list_queue.empty() is False: try: app_name = self.app_list_queue.get_nowait() except Empty: continue self.work_queue.put(app_name) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.wait_instances,)) worker.start() self.work_queue.join() def start_test(self): task_list = self.get_tasks() for action in self.action_list: if isinstance(action, six.text_type): if action.startswith('sleep='): amount = int(action.split('=')[1]) sleep(60*amount) continue self.fill_queue(task_list) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(action,)) worker.start() self.work_queue.join() def generate_report(self): today = datetime.today() file_prefix = "{:%Y-%m-%d_%H_%M_%S-}".format(today) file_name = (file_prefix + 'health_check_result-' + str(self.total_tasks_cout) + 'tasks.csv') f = open(file_name, "w") f.write("Task ID,Health check timestamp") while self.result_queue.empty() is False: try: result = self.result_queue.get_nowait() except Empty: continue for timestamp in result['data']: f.write("\n%s,%s" % (result['id'], timestamp)) f.close()
from marathon import MarathonClient from marathon.models import MarathonApp import time import csv import json c = MarathonClient('http://localhost:8080') print(c.list_tasks()) task_name = "" fibo_array = [0, 1] for k in c.list_tasks(): new_string = str(k) app_name = new_string.split("'app_id': ")[1].split( ", 'health_check_results'")[0] app_id = new_string.split("'id': ")[1].split(", 'ports'")[0] if 'fibonaccitest' in app_name: task_name = app_name task_id = app_id def Fibonacci(num): if num < 0: print("Invalid input") elif num <= len(fibo_array): return fibo_array[num - 1] else: temp_fib = Fibonacci(num - 1) + Fibonacci(num - 2) fibo_array.append(temp_fib) return temp_fib
class Scaler: """Class for Scaling""" def __init__(self, app_name, config): self.logger = logging.getLogger("autoscaling") self.logger.setLevel(logging.DEBUG) self.logger.debug("Init object scaler...") self.config = config self.logger.debug("Connect RESTful mariadb and get policies...") conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'], config["MARIA_RESTFUL"]['port']) conn.request("GET", "/app/name/"+app_name) json_app = conn.getresponse().read().decode("utf-8") self.app = json.loads(json_app) conn.request("GET", "/app/name/"+app_name+"/policies") json_policies = conn.getresponse().read().decode("utf-8") self.app["policies"] = json.loads(json_policies) self.logger.debug("Connect influxdb and marathon...") self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"], config["INFLUXDB"]["port"], config["INFLUXDB"]["username"], config["INFLUXDB"]["password"], config["INFLUXDB"]["db_name"]) self.marathon_client = MarathonClient('http://'+config["MARATHON"]['host']+':'+config["MARATHON"]['port']) self.app["instance"] = self.marathon_client.get_app(app_name).instances self.app["mem"] = self.marathon_client.get_app(app_name).mem self.app["cpus"] = self.marathon_client.get_app(app_name).cpus self.logger.debug("Reconfig haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://"+config["MARATHON"]["host"]+":"+config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg") def setup_logging(self, log_file = "autoscaling.log", level = logging.INFO, formatter = None): if(formatter == None): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler(log_file) fh.setLevel(level) fh.setFormatter(formatter) self.logger.addHandler(fh) def get_cpu_usage(self, container_name): """Return cpu usage of container_name @param string container_name container name """ query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage from stats where container_name = '"+container_name+"' and time > now()-5m group by time(2s) " result = self.influx_client.query(query) points = result[0]["points"] return (points[0][1]/1000000000/self.app["cpus"])*100 def get_container_name(self, mesos_task_id): """Return container name mapping with mesos_task_id in messos @param string mesos_task_id """ query = "select container_name from "+self.config["INFLUXDB"]["ts_mapping"]+" where time>now() - 5m and mesos_task_id = '" +mesos_task_id+"' limit 1" result = self.influx_client.query(query) points = result[0]["points"] return points[0][2] def get_containers_name(self): """Return list all containers name of application have name app_name @param string app_name name of application @return list all containers name of app_name """ tasks = self.marathon_client.list_tasks(self.app["name"]) containers_name = [] for task in tasks: containers_name.append(self.get_container_name(task.id)) return containers_name def avg_mem_usage(self, containers_name): """Return avg memmory usage of all containers in list containers_name @param list containers_name list containers name @return float avg mem usage """ number_container = len(containers_name) containers_name = ["'"+x+"'" for x in containers_name] containers_name = ",".join(containers_name) query = "select memory_usage,container_name from stats where time > now()-5m and container_name in ("+containers_name+") limit "+str(number_container*2) result = self.influx_client.query(query) points = result[0]["points"] sum_memory_usage = 0 for point in points: if(point[3] != None): sum_memory_usage += point[3]/(self.app["mem"]*1048576)*100 return sum_memory_usage / number_container def avg_cpu_usage(self, containers_name): """Return avg cpu usage of all containers in list containers_name @param list containers_name list containers name @return float avg cpu usage """ number_container = len(containers_name) containers_name = ["'"+x+"'" for x in containers_name] containers_name = ",".join(containers_name) query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage,container_name from stats where time > now()-5m and container_name in ("+containers_name+") group by time(10s),container_name limit "+str(number_container) result = self.influx_client.query(query) points = result[0]["points"] sum_cpu_usage = 0 for point in points: sum_cpu_usage += point[1]/1000000000/self.app["cpus"]*100 return sum_cpu_usage / number_container def scale(self, delta): """sacle app_name (add or remove) delta intances @param string app_name name of application @param int delta number intances add or remove """ new_instance = self.app["instance"] + delta if(new_instance > self.app['max_instances']): new_instance = self.app['max_instances'] if(new_instance < self.app['min_instances']): new_instance = self.app['min_instances'] if(new_instance != self.app["instance"]): self.marathon_client.scale_app(self.app["name"], new_instance) self.logger.debug("Scaling "+self.app["name"]+" to: "+str(new_instance)) self.logger.debug("Waiting for config file haproxy.cfg...") time.sleep(self.config["TIME"]['w_config_ha']) self.logger.debug("Config file haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://"+self.config["MARATHON"]["host"]+":"+self.config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg") self.app["instance"] =self.marathon_client.get_app(self.app["name"]).instances self.logger.debug("Sleep "+str(self.config["TIME"]['after_scale'])+"s...") time.sleep(self.config["TIME"]['after_scale']) def check_rule(self, policie, value): """Check rule and return number intances need scale @param models.Policie policies @param tuple value values of metric @return integer number intances need scale """ delta = {} delta["up"] = 0 delta["down"] = 0 # Check upper_threshold if(value[policie["metric_type"]] > policie["upper_threshold"]): delta['up'] = policie["instances_in"] # Check lower_threshold if(value[policie["metric_type"]] < policie["lower_threshold"]): delta['down'] = policie["instances_out"] return delta def autoscaling(self): while True: try: containers_name = self.get_containers_name() avg_cpu = self.avg_cpu_usage(containers_name) avg_mem = self.avg_mem_usage(containers_name) self.logger.info("Avg cpu usage, avg memmory usage, current instance: %f %f %d", avg_cpu, avg_mem, self.app["instance"]) rs_detal = {} rs_detal['up'] = 0 rs_detal['down'] = 10 for policie in self.app["policies"]: delta = self.check_rule(policie, (avg_cpu, avg_mem)) if(rs_detal['up'] < delta['up']): rs_detal['up'] = delta['up'] if(rs_detal['down'] > delta['down']): rs_detal['down'] = delta['down'] if(rs_detal['up'] > 0): self.scale(rs_detal['up']) elif(rs_detal['down'] > 0): self.scale(0-rs_detal['down']) except Exception as e: self.logger.debug(str(e)) finally: time.sleep(self.config["TIME"]['monitor'])
class HealthCheckBencher(object): def __init__(self, marathon_url, image, tasks): self.concurrency = 20 self.docker_image = image self.app_base_name = 'health-check-test-' self.total_tasks_cout = int(tasks) self.instances_per_app = 50 if tasks < self.instances_per_app: self.instances_per_app = self.total_tasks_cout self.app_count = 1 else: self.app_count = self.total_tasks_cout/self.instances_per_app self.heath_check_interval = 30 self.test_duration = 20 self.marathon_cluster = MarathonClient(marathon_url, timeout=240) self.work_queue = Queue() self.result_queue = Queue() self.app_list_queue = Queue() self.action_list = [self.start_collect, 'sleep={}'.format(self.test_duration), self.get_stats] def remove_apps(self): apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith("/"+self.app_base_name): self.marathon_cluster.delete_app(app.id) active = 0 while True: apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith(self.app_base_name): active += 1 if active == 0: break def create_app(self, id): port_mapping = MarathonContainerPortMapping(container_port=80, protocol="tcp") app_docker = MarathonDockerContainer( image=self.docker_image, network="BRIDGE", force_pull_image=True, port_mappings=[port_mapping]) app_container = MarathonContainer(docker=app_docker) http_health_check = MarathonHealthCheck( protocol="HTTP", path="/status", grace_period_seconds=300, interval_seconds=self.heath_check_interval, timeout_seconds=20, max_consecutive_failures=0 ) app_suffix = str(md5(str(random())).hexdigest()) app_name = self.app_base_name + app_suffix new_app = MarathonApp(cpus=CPUS, mem=MEM, disk=DISK, container=app_container, health_checks=[http_health_check], instances=self.instances_per_app, max_launch_delay_seconds=5) print("Creating {}".format(app_name)) self.marathon_cluster.create_app(app_id=app_name, app=new_app) self.app_list_queue.put(app_name) return None def wait_instances(self, app_name): health_ok = 0 while health_ok < self.instances_per_app: health_ok = 0 tasks = self.marathon_cluster.list_tasks(app_name) for task in tasks: if task.health_check_results: health_ok += 1 def start_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/start_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was started') else: print(task['id']+': failed to start collecter') def stop_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/stop_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was stopped') else: print(task['id']+': failed to stop collecter') def clear_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/clear_stats' res = urlopen(url) if res.getcode() == 200: print(task['id']+': stats was dropped') else: print(task['id']+': stats was dropped') def get_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/get_timestamps' try: res = urlopen(url) except Exception: print("URL req failed") self.result_queue.put({'id': task['id'], 'status': 'Failed', 'data': []}) return if res.getcode() == 200: data = res.read() timestamps = data.split(',') self.result_queue.put({'id': task['id'], 'status': 'ok', 'data': timestamps}) elif res.getcode() == 202: print("Collecting is not enabled") self.result_queue.put({'id': task['id'], 'status': 'Collecting is not enabled', 'data': []}) else: print("Unknown response code") self.result_queue.put({'id': task['id'], 'status': 'Unknown response code', 'data': []}) def repeat(self, action): while self.work_queue.empty() is False: try: iteration = self.work_queue.get_nowait() except Empty: continue action(iteration) self.work_queue.task_done() def fill_queue(self, iterations): for iteration in iterations: self.work_queue.put(iteration) def get_tasks(self): res = [] tasks = self.marathon_cluster.list_tasks() for task in tasks: if not task.id.startswith('health-check-test-'): continue res.append({'id': str(task.id), 'host': str(task.host), 'port': str(task.ports[0])}) return res def create_apps(self): self.fill_queue(range(self.app_count)) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.create_app,)) worker.start() self.work_queue.join() while self.app_list_queue.empty() is False: try: app_name = self.app_list_queue.get_nowait() except Empty: continue self.work_queue.put(app_name) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.wait_instances,)) worker.start() self.work_queue.join() def start_test(self): task_list = self.get_tasks() for action in self.action_list: if isinstance(action, basestring): if action.startswith('sleep='): amount = int(action.split('=')[1]) sleep(60*amount) continue self.fill_queue(task_list) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(action,)) worker.start() self.work_queue.join() def generate_report(self): today = datetime.today() file_prefix = "{:%Y-%m-%d_%H_%M_%S-}".format(today) file_name = (file_prefix + 'health_check_result-' + str(self.total_tasks_cout) + 'tasks.csv') f = open(file_name, "w") f.write("Task ID,Health check timestamp") while self.result_queue.empty() is False: try: result = self.result_queue.get_nowait() except Empty: continue for timestamp in result['data']: f.write("\n%s,%s" % (result['id'], timestamp)) f.close()