def in_place_restart(client: MarathonClient, appid: str): pre = client.get_app(appid).instances deployment = client.scale_app(appid, 0) wait_for_deployment(client, deployment) print('Scaled {} down to 0'.format(appid)) deployment = client.scale_app(appid, pre) wait_for_deployment(client, deployment) print('{} back at {} again'.format(appid, pre))
def launch_elsa(marathon, stats_file, scale_window): logging.info('Start monitoring the inbound traffic on topics using %s' % (stats_file)) # make sure the stats file is properly initialized: if not os.path.exists(stats_file): f = open(stats_file, 'w') f.write('0') f.close() # launch the Elsa app via Marathon c = MarathonClient(marathon) c.create_app( 'elsa', MarathonApp(cmd='/home/vagrant/elsa/launch-elsa.sh', mem=200, cpus=1, user='******')) # c.list_apps() print( 'ElSA is deployed and running, waiting now 5 sec before starting auto-scale ...' ) time.sleep(5) # allow time to deploy before autoscaling sets in # kick off traffic monitoring and trigger autoscaling: previous_topic_traffic = 0 try: while True: with open(stats_file, 'r') as elsa_file: topic_traffic = int(elsa_file.read()) topic_traffic_diff = topic_traffic - previous_topic_traffic print('Difference in traffic in the past %d seconds: %d' % (scale_window, topic_traffic_diff)) previous_topic_traffic = topic_traffic current_instance_num = c.get_app('elsa').instances if topic_traffic_diff > TRAFFIC_INCREASE_THRESHOLD: # we see a surge of traffic above threshold ... instance_multiplier = int( topic_traffic_diff / SCALE_FACTOR) # ... increase number of instances c.scale_app('elsa', current_instance_num * instance_multiplier) print('Increasing number of instances to %d' % (current_instance_num * instance_multiplier)) elif topic_traffic_diff < 0: # negative, back off exponentially target_instance_num = int(current_instance_num / 2) if target_instance_num > 1: c.scale_app('elsa', target_instance_num) print('Decreasing number of instances to %d' % (target_instance_num)) else: c.scale_app('elsa', 1) print('Resetting number of instances to 1') time.sleep(scale_window) except KeyboardInterrupt: print( 'ElSA has been stopped by user, halting app and rolling back deployment. Thanks and bye!' ) c.delete_app('elsa', force=True)
def send_to_marathon(request): try: if request.method == 'POST': action = request.POST.get('action', None) id = request.POST.get('id', None) mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) if action == 'stop': mc.scale_app(id, 0, force=True) elif action == 'start': mc.scale_app(id, 1) elif action == 'destroy': if request.user.has_perm("auth.can_init_app"): mc.delete_app(id) else: raise PermissionDenied elif action == 'restart': mc.restart_app(id) elif action == 'scale': mc.scale_app(id, int(request.POST.get('number_instance'))) elif action == 'update': app = mc.get_app(id) app.cpus = float(request.POST.get('cpus')) app.mem = float(request.POST.get('mem')) app.container.docker.image = request.POST.get('version') mc.update_app(id, app) elif action == "stop-deployment": mc.delete_deployment(id) result = '{"status":"success", "msg": "%(action)s success"}'%{"action":action} except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }'%{"action":action, "error": html.escape(str(e))} return HttpResponse(result)
def launch_elsa(marathon, stats_file, scale_window): logging.info('Start monitoring the inbound traffic on topics using %s' %(stats_file)) # make sure the stats file is properly initialized: if not os.path.exists(stats_file): f = open(stats_file, 'w') f.write('0') f.close() # launch the Elsa app via Marathon c = MarathonClient(marathon) c.create_app('elsa', MarathonApp(cmd='/home/vagrant/elsa/launch-elsa.sh', mem=200, cpus=1, user='******')) # c.list_apps() print('ElSA is deployed and running, waiting now 5 sec before starting auto-scale ...') time.sleep(5) # allow time to deploy before autoscaling sets in # kick off traffic monitoring and trigger autoscaling: previous_topic_traffic = 0 try: while True: with open(stats_file, 'r') as elsa_file: topic_traffic = int(elsa_file.read()) topic_traffic_diff = topic_traffic - previous_topic_traffic print('Difference in traffic in the past %d seconds: %d' %(scale_window, topic_traffic_diff)) previous_topic_traffic = topic_traffic current_instance_num = c.get_app('elsa').instances if topic_traffic_diff > TRAFFIC_INCREASE_THRESHOLD: # we see a surge of traffic above threshold ... instance_multiplier = int(topic_traffic_diff / SCALE_FACTOR) # ... increase number of instances c.scale_app('elsa', current_instance_num * instance_multiplier) print('Increasing number of instances to %d' %(current_instance_num * instance_multiplier)) elif topic_traffic_diff < 0: # negative, back off exponentially target_instance_num = int(current_instance_num/2) if target_instance_num > 1: c.scale_app('elsa', target_instance_num) print('Decreasing number of instances to %d' %(target_instance_num)) else: c.scale_app('elsa', 1) print('Resetting number of instances to 1') time.sleep(scale_window) except KeyboardInterrupt: print('ElSA has been stopped by user, halting app and rolling back deployment. Thanks and bye!') c.delete_app('elsa', force=True)
def send_to_marathon(request): try: if request.method == "POST": action = request.POST.get("action", None) app_id = request.POST.get("id", None) mc = MarathonClient("http://{}:{}".format(settings.MARATHON["host"], settings.MARATHON["port"])) if action == "stop": mc.scale_app(app_id, 0) elif action == "start": mc.scale_app(app_id, 1) elif action == "destroy": mc.delete_app(app_id) elif action == "restart": pass elif action == "scale": mc.scale_app(app_id, int(request.POST.get("number_instance"))) result = '{"status":"success", "msg": "%(action)s success"}' % {"action": action} except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % { "action": action, "error": html.escape(str(e)), } return HttpResponse(result)
def send_to_marathon(request): try: if request.method == 'POST': action = request.POST.get('action', None) id = request.POST.get('id', None) mc = MarathonClient('http://{}:{}'.format( settings.MARATHON['host'], settings.MARATHON['port'])) if action == 'stop': mc.scale_app(id, 0, force=True) elif action == 'start': mc.scale_app(id, 1) elif action == 'destroy': if request.user.has_perm("auth.can_init_app"): mc.delete_app(id) else: raise PermissionDenied elif action == 'restart': mc.restart_app(id) elif action == 'scale': mc.scale_app(id, int(request.POST.get('number_instance'))) elif action == 'update': app = mc.get_app(id) app.cpus = float(request.POST.get('cpus')) app.mem = float(request.POST.get('mem')) app.container.docker.image = request.POST.get('version') mc.update_app(id, app) elif action == "stop-deployment": mc.delete_deployment(id) result = '{"status":"success", "msg": "%(action)s success"}' % { "action": action } except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % { "action": action, "error": html.escape(str(e)) } return HttpResponse(result)
def deploy(app_definition, marathon_url, instances, auth_token, zero, force): old_appids = [] # Connect to Marathon print("\nConnecting to Marathon...") c = MarathonClient(marathon_url, auth_token=auth_token) print("Connected to", marathon_url) # Pick up the Marathon App Definition file app_json = open(app_definition).read() app = MarathonApp.from_json(json.loads(app_json)) new_app_id = app.id service_name = new_app_id.split("/")[-1].split(".")[0] # Instantiate the new application on DC/OS but don't launch it yet # The application definition instances field should be 0 by default # If forced, the application will be relaunched even if the ID already exists print("\nInstantiating new application on Marathon with", app.instances, "instances...") try: c.create_app(new_app_id, app) except: if force == 'Yes': print("\nForcing redeploy of the same app id...", new_app_id) c.update_app(new_app_id, app, force=True, minimal=True) check_deployment(c, new_app_id) pass else: sys.exit() print("Created app", new_app_id) # List and find currently running apps of the same service # This assumes the naming convention (id): /some/group/service_name.uniquevalue print("\nFinding any existing apps for service:", service_name) for app in c.list_apps(): existing_service_name = app.id.split("/")[-1].split(".")[0] if (service_name == existing_service_name) and app.instances > 0: print("Found up and running application id:", app.id) old_appids.append(app.id) # If it's the first deployment ever, just launch the desired number of instances # Otherwise perform a hybrid release # Finally clean up any older app instances running if not old_appids: if instances is None: instances = 2 print("No current apps found. Launching brand new service with", instances, "instances...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) else: old_appids.reverse() if zero == 'Yes': print("\nStarting zero downtime deployment for...", new_app_id) for old_appid in old_appids: if instances is None: instances = c.get_app(old_appid).instances if (old_appid == '' or old_appid == new_app_id or old_appid == '/' + new_app_id): print("Scaling existing app_id", new_app_id, "to", instances, "instances...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) else: print("Target number of total instances:", instances) delta = int(round(instances * .50)) delta = (delta if delta > 0 else 1) scale(c, new_app_id, old_appid, delta) if (c.get_app(new_app_id).instances != instances): print("\nLaunch", instances - delta, "remaining instance(s) of the new version...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) if (c.get_app(old_appid).instances > 0): print( "Finish shutting down remaining instances of the old version..." ) c.scale_app(old_appid, instances=0) check_deployment(c, old_appid) else: print("Started deployment with downtime...") for old_appid in old_appids: c.scale_app(old_appid, instances=0) check_deployment(c, old_appid) c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) print("\nSUCCESS:\nNew application ID:", new_app_id, "\nRunning instances:", instances)
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id, timeout=300): st_time = time.time() while(time.time() - st_time < timeout): try: try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a except: l.info("mcli: get_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli get_app timed out, possible zookeper/marathon/mesos malfunction") def delete_app(self, app_id, force=False, timeout=200): st_time = time.time() while(time.time() - st_time < timeout): try: self.mcli.delete_app(app_id, force) return except: l.info("mcli: delete_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli delete_app timed out, possible zookeper/marathon/mesos malfunction") def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count, sleep_before_next_try=1): cnt = 0 while True: a1 = self.get_app(app) # if tasks_running are greater (due to whatever reason, scale down accordingly) if a1.tasks_running > running_count: delta = a1.tasks_running - running_count l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count) self.scale_app(app, running_count) # Allow for some time before next poll time.sleep(1) continue if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(sleep_before_next_try) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale, timeout=300): st_time = time.time() while(time.time() - st_time < timeout): try: self.mcli.scale_app(app, scale) return except: l.info("mcli: scale_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli scale_app timed out, possible zookeper/marathon/mesos malfunction") def ping(self): return self.mcli.ping() def kill_task(self, app_id, task_id): return self.mcli.kill_task(app_id, task_id)
import time from optparse import OptionParser from marathon import MarathonClient from marathon.models import MarathonApp if __name__ == '__main__': usage = ('python %prog') parser = OptionParser(description='Simple marathon-python based master to launch apps', version="0.1 ", usage=usage) (options, args) = parser.parse_args() if (len(args) != 0): parser.print_help() sys.exit(2) print "Initiating marathonclient..." c = MarathonClient('http://localhost:8080') app_cmd = "python /home/abdullah/cosmic-space/test-mesos/py-zmq/sub_client.py --server_ip_ports 10.10.0.2:5556" # launch app print "Initiating zmq-client app" c.create_app('zmq-client', MarathonApp(cmd=app_cmd, mem=16, cpus=0.01)) # scale raw_input("scale_apps upto 400") c.scale_app('zmq-client', instances=400) # delete raw_input("delete apps") c.delete_app('zmq-client')
filter_inject = filter_read.read() with open('%s/security/config.xml' % GEOSERVER_DATA_DIR) as config_read: full_config = config_read.read() if 'anonReload' in full_config: logging.info('Configuration already supports anonymous REST reloads.') # Only shim in anonymous reload and restart GeoServer if it hasn't been done before else: config_read.seek(0) with open('%s/security/config.xml-output' % GEOSERVER_DATA_DIR, 'w') as config_write: line_value = config_read.readline() while len(line_value): config_write.write('%s' % line_value) if '<filterChain' in line_value: config_write.write('%s' % filter_inject) line_value = config_read.readline() shutil.move('%s/security/config.xml-output' % GEOSERVER_DATA_DIR, '%s/security/config.xml' % GEOSERVER_DATA_DIR) response = MARATHON_CLIENT.kill_tasks(GEOSERVER_APP) if not len(response) == 1: logging.critical('Error restarting GeoServer') sys.exit(1) MARATHON_CLIENT.scale_app(GEOSERVER_APP, GEOSERVER_INSTANCES) block_for_healthy_app(MARATHON_CLIENT, GEOSERVER_APP, GEOSERVER_INSTANCES) logging.info('Bootstrap complete.')
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id): try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a def delete_app(self, app_id, force=False): return self.mcli.delete_app(app_id, force) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count): cnt = 0 while True: a1 = self.get_app(app) if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(1) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale): return self.mcli.scale_app(app, scale) def ping(self): return self.mcli.ping()
class MarathonManager(object): def __init__(self, server): self._client = MarathonClient(server) def __repr__(self): return self.server def create(self, app): app._create(self._client) def discover(self, app_filter=None, env_filter=False): apps = set() for app in self._client.list_apps(): if not app_filter or ('_tonomi_application', app_filter) in app.labels.items(): if not env_filter: apps.add(reduce_app_name(app.id)) else: if '_tonomi_environment' in app.labels.keys(): env_name = app.labels['_tonomi_environment'] apps.add('/{}'.format(env_name)) return list(apps) def get_apps(self, app_type, env_name): env_name = env_name.replace('/', '') apps = [] for app in self._client.list_apps(): if ('_tonomi_environment', env_name) in app.labels.items() and ( '_tonomi_application', app_type) in app.labels.items(): apps.append(app) return [self._client.get_app(app.id) for app in apps] def get_app_host(self, app_type, env_name): while True: apps = self.get_apps(app_type=app_type, env_name=env_name) for app in apps: for task in app.tasks: host = task.host return host time.sleep(5) def health_check(self): pass def destroy(self, name): try: self._client.delete_group(name, force=True) except: pass def update(self): pass def restart(self): pass def scale_app(self, app_name, num): self._client.scale_app(app_name, num, force=True) def free_ports(self, num=1): return get_free_ports(self._client, num)
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id, timeout=300): st_time = time.time() while (time.time() - st_time < timeout): try: try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a except: l.info("mcli: get_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli get_app timed out, possible zookeper/marathon/mesos malfunction" ) def delete_app(self, app_id, force=False, timeout=200): st_time = time.time() while (time.time() - st_time < timeout): try: self.mcli.delete_app(app_id, force) return except: l.info("mcli: delete_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli delete_app timed out, possible zookeper/marathon/mesos malfunction" ) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error( "Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str( e ).find('App is locked by one or more deployments. Override with the option' ) >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count, sleep_before_next_try=1): cnt = 0 while True: a1 = self.get_app(app) # if tasks_running are greater (due to whatever reason, scale down accordingly) if a1.tasks_running > running_count: delta = a1.tasks_running - running_count l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count) self.scale_app(app, running_count) # Allow for some time before next poll time.sleep(1) continue if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(sleep_before_next_try) if (cnt % 30) == 29: l.info( "[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale, timeout=300): st_time = time.time() while (time.time() - st_time < timeout): try: self.mcli.scale_app(app, scale) return except: l.info("mcli: scale_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli scale_app timed out, possible zookeper/marathon/mesos malfunction" ) def ping(self): return self.mcli.ping() def kill_task(self, app_id, task_id): return self.mcli.kill_task(app_id, task_id)
class MarathonAPI(object): user = None password = None host = None use_https = False port = None url = None marathon_cli = None dict_apps = {} def __init__(self, host, port=80, use_https=False, user=None, password=None): self.user = user self.password = password self.host = host self.use_https = use_https self.port = str(port) self.url = '{}://{}:{}/'.format('https' if use_https else 'http', host, port) try: self.marathon_cli = MarathonClient([self.url], username=self.user, password=self.password) except Exception as e: logger.critical(e) raise e def scaleOneApp(self, app_id, delta=None): logger.info( 'App: [{}] :: Scale {} Delta:[{}] Atual:[{}] Staged:[{}]'.format( app_id, 'up' if delta > 0 else 'down', delta, self.dict_apps[app_id].tasksRunning, self.dict_apps[app_id].tasksStaged)) try: self.marathon_cli.scale_app(app_id=app_id, delta=delta) except MarathonHttpError as e: logger.error(e.error_message) except: raise def findAppsWithAutoscaleLabels(self): list = self.marathon_cli.list_apps(embed_counts=True, embed_task_stats=True) logger.debug('Lista recebida {}'.format(list)) if len(list) == 0: logger.warning('0 apps loaded. Your marathon have apps?') for app in list: if LABEL_FOR_AUTOSCALE_ENABLE in app.labels: new_app = MarathonApp(app.id) new_app.tasksRunning = app.tasks_running new_app.tasksStaged = app.tasks_staged for label in MANDATORY_LABELS_APP: if label in app.labels: value = app.labels[label] if value.isnumeric(): value = int(value) new_app.__setattr__(label, value) else: logger.error( 'App: [{}] :: dont have MANDATORY_LABELS :: {}'. format(app.id, label)) for label in OPTIONAL_LABELS_APP: if label in app.labels: value = app.labels[label] if value.isnumeric(): value = int(value) new_app.__setattr__(label, value) self.dict_apps[app.id] = new_app else: logger.debug( 'App: [{}] :: dont have {} = True. If you want to scale, please add labels.' .format(app.id, LABEL_FOR_AUTOSCALE_ENABLE)) def scaleApps(self, rabbitmq): for app_id in self.dict_apps: app = self.dict_apps[app_id] required, delta = app.scaneRequired(rabbitmq) if required: self.scaleOneApp(app_id=app_id, delta=delta) else: logger.info('App: [{}] :: Not Required Scale'.format(app_id))
class MarathonHTTPClient(AbstractSchedulerClient): def __init__(self, target, auth, options, pkey): super(MarathonHTTPClient, self).__init__(target, auth, options, pkey) self.target = settings.MARATHON_HOST self.registry = settings.REGISTRY_HOST + ':' + settings.REGISTRY_PORT self.client = MarathonClient('http://' + self.target + ':8180') self.fleet = FleetHTTPClient('/var/run/fleet.sock', auth, options, pkey) # helpers def _app_id(self, name): return name.replace('_', '.') # container api def create(self, name, image, command='', **kwargs): """Create a new container""" app_id = self._app_id(name) l = locals().copy() l.update(re.match(MATCH, name).groupdict()) image = self.registry + '/' + image mems = kwargs.get('memory', {}).get(l['c_type']) m = 0 if mems: mems = mems.lower() if mems[-2:-1].isalpha() and mems[-1].isalpha(): mems = mems[:-1] m = int(mems[:-1]) c = 0.5 cpu = kwargs.get('cpu', {}).get(l['c_type']) if cpu: c = cpu cmd = "docker run --name {name} -P {image} {command}".format( **locals()) self.client.create_app( app_id, MarathonApp(cmd=cmd, mem=m, cpus=c, instances=0)) for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 0: return time.sleep(1) def start(self, name): """Start a container.""" self.client.scale_app(self._app_id(name), 1, force=True) for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 1: break time.sleep(1) host = self.client.get_app(self._app_id(name)).tasks[0].host self._waitforcontainer(host, name) def destroy(self, name): """Destroy a container.""" try: host = self.client.get_app(self._app_id(name)).tasks[0].host self.client.delete_app(self._app_id(name), force=True) self._delete_container(host, name) except: self.client.delete_app(self._app_id(name), force=True) def _get_container_state(self, host, name): docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version='1.17') try: if docker_cli.inspect_container(name)['State']['Running']: return JobState.up except: return JobState.destroyed def _waitforcontainer(self, host, name): for _ in xrange(POLL_WAIT): if self._get_container_state(host, name) == JobState.up: return time.sleep(1) raise RuntimeError("App container Not Started") def _delete_container(self, host, name): docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version='1.17') if docker_cli.inspect_container(name)['State']: docker_cli.remove_container(name, force=True) def run(self, name, image, entrypoint, command): # noqa """Run a one-off command.""" return self.fleet.run(name, image, entrypoint, command) def state(self, name): """Display the given job's running state.""" try: for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 1: return JobState.up elif self.client.get_app( self._app_id(name)).tasks_running == 0: return JobState.created time.sleep(1) except: return JobState.destroyed
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id): try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a def delete_app(self, app_id, force=False): return self.mcli.delete_app(app_id, force) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise def create_app(self, app_id, attr): for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count): cnt = 0 while True: a1 = self.get_app(app) if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(1) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale): return self.mcli.scale_app(app, scale) def ping(self): return self.mcli.ping()
class MarathonHTTPClient(AbstractSchedulerClient): def __init__(self, target, auth, options, pkey): super(MarathonHTTPClient, self).__init__(target, auth, options, pkey) self.target = settings.MARATHON_HOST self.registry = settings.REGISTRY_HOST + ":" + settings.REGISTRY_PORT self.client = MarathonClient("http://" + self.target + ":8180") self.fleet = FleetHTTPClient("/var/run/fleet.sock", auth, options, pkey) # helpers def _app_id(self, name): return name.replace("_", ".") # container api def create(self, name, image, command="", **kwargs): """Create a new container""" app_id = self._app_id(name) l = locals().copy() l.update(re.match(MATCH, name).groupdict()) image = self.registry + "/" + image mems = kwargs.get("memory", {}).get(l["c_type"]) m = 0 if mems: mems = mems.lower() if mems[-2:-1].isalpha() and mems[-1].isalpha(): mems = mems[:-1] m = int(mems[:-1]) c = 0.5 cpu = kwargs.get("cpu", {}).get(l["c_type"]) if cpu: c = cpu cmd = "docker run --name {name} -P {image} {command}".format(**locals()) self.client.create_app(app_id, MarathonApp(cmd=cmd, mem=m, cpus=c, instances=0)) for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 0: return time.sleep(1) def start(self, name): """Start a container.""" self.client.scale_app(self._app_id(name), 1, force=True) for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 1: break time.sleep(1) host = self.client.get_app(self._app_id(name)).tasks[0].host self._waitforcontainer(host, name) def destroy(self, name): """Destroy a container.""" try: host = self.client.get_app(self._app_id(name)).tasks[0].host self.client.delete_app(self._app_id(name), force=True) self._delete_container(host, name) except: self.client.delete_app(self._app_id(name), force=True) def _get_container_state(self, host, name): docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version="1.17") try: if docker_cli.inspect_container(name)["State"]["Running"]: return JobState.up except: return JobState.destroyed def _waitforcontainer(self, host, name): for _ in xrange(POLL_WAIT): if self._get_container_state(host, name) == JobState.up: return time.sleep(1) raise RuntimeError("App container Not Started") def _delete_container(self, host, name): docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version="1.17") if docker_cli.inspect_container(name)["State"]: docker_cli.remove_container(name, force=True) def run(self, name, image, entrypoint, command): # noqa """Run a one-off command.""" return self.fleet.run(name, image, entrypoint, command) def state(self, name): """Display the given job's running state.""" try: for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 1: return JobState.up elif self.client.get_app(self._app_id(name)).tasks_running == 0: return JobState.created time.sleep(1) except: return JobState.destroyed
class Scaler: """Class for Scaling""" def __init__(self, app_name, config): self.logger = logging.getLogger("autoscaling") self.logger.setLevel(logging.DEBUG) self.logger.debug("Init object scaler...") self.config = config self.logger.debug("Connect RESTful mariadb and get policies...") conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'], config["MARIA_RESTFUL"]['port']) conn.request("GET", "/app/name/" + app_name) json_app = conn.getresponse().read().decode("utf-8") self.app = json.loads(json_app) conn.request("GET", "/app/name/" + app_name + "/policies") json_policies = conn.getresponse().read().decode("utf-8") self.app["policies"] = json.loads(json_policies) self.logger.debug("Connect influxdb and marathon...") self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"], config["INFLUXDB"]["port"], config["INFLUXDB"]["username"], config["INFLUXDB"]["password"], config["INFLUXDB"]["db_name"]) self.marathon_client = MarathonClient('http://' + config["MARATHON"]['host'] + ':' + config["MARATHON"]['port']) self.app["instance"] = self.marathon_client.get_app(app_name).instances self.app["mem"] = self.marathon_client.get_app(app_name).mem self.app["cpus"] = self.marathon_client.get_app(app_name).cpus self.logger.debug("Reconfig haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://" + config["MARATHON"]["host"] + ":" + config["MARATHON"]["port"] + " --haproxy-config /etc/haproxy/haproxy.cfg") def setup_logging(self, log_file="autoscaling.log", level=logging.INFO, formatter=None): if (formatter == None): formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler(log_file) fh.setLevel(level) fh.setFormatter(formatter) self.logger.addHandler(fh) def get_cpu_usage(self, container_name): """Return cpu usage of container_name @param string container_name container name """ query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage from stats where container_name = '" + container_name + "' and time > now()-5m group by time(2s) " result = self.influx_client.query(query) points = result[0]["points"] return (points[0][1] / 1000000000 / self.app["cpus"]) * 100 def get_container_name(self, mesos_task_id): """Return container name mapping with mesos_task_id in messos @param string mesos_task_id """ query = "select container_name from " + self.config["INFLUXDB"][ "ts_mapping"] + " where time>now() - 5m and mesos_task_id = '" + mesos_task_id + "' limit 1" result = self.influx_client.query(query) points = result[0]["points"] return points[0][2] def get_containers_name(self): """Return list all containers name of application have name app_name @param string app_name name of application @return list all containers name of app_name """ tasks = self.marathon_client.list_tasks(self.app["name"]) containers_name = [] for task in tasks: containers_name.append(self.get_container_name(task.id)) return containers_name def avg_mem_usage(self, containers_name): """Return avg memmory usage of all containers in list containers_name @param list containers_name list containers name @return float avg mem usage """ number_container = len(containers_name) containers_name = ["'" + x + "'" for x in containers_name] containers_name = ",".join(containers_name) query = "select memory_usage,container_name from stats where time > now()-5m and container_name in (" + containers_name + ") limit " + str( number_container * 2) result = self.influx_client.query(query) points = result[0]["points"] sum_memory_usage = 0 for point in points: if (point[3] != None): sum_memory_usage += point[3] / (self.app["mem"] * 1048576) * 100 return sum_memory_usage / number_container def avg_cpu_usage(self, containers_name): """Return avg cpu usage of all containers in list containers_name @param list containers_name list containers name @return float avg cpu usage """ number_container = len(containers_name) containers_name = ["'" + x + "'" for x in containers_name] containers_name = ",".join(containers_name) query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage,container_name from stats where time > now()-5m and container_name in (" + containers_name + ") group by time(10s),container_name limit " + str( number_container) result = self.influx_client.query(query) points = result[0]["points"] sum_cpu_usage = 0 for point in points: sum_cpu_usage += point[1] / 1000000000 / self.app["cpus"] * 100 return sum_cpu_usage / number_container def scale(self, delta): """sacle app_name (add or remove) delta intances @param string app_name name of application @param int delta number intances add or remove """ new_instance = self.app["instance"] + delta if (new_instance > self.app['max_instances']): new_instance = self.app['max_instances'] if (new_instance < self.app['min_instances']): new_instance = self.app['min_instances'] if (new_instance != self.app["instance"]): self.marathon_client.scale_app(self.app["name"], new_instance) self.logger.debug("Scaling " + self.app["name"] + " to: " + str(new_instance)) self.logger.debug("Waiting for config file haproxy.cfg...") time.sleep(self.config["TIME"]['w_config_ha']) self.logger.debug("Config file haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://" + self.config["MARATHON"]["host"] + ":" + self.config["MARATHON"]["port"] + " --haproxy-config /etc/haproxy/haproxy.cfg") self.app["instance"] = self.marathon_client.get_app( self.app["name"]).instances self.logger.debug("Sleep " + str(self.config["TIME"]['after_scale']) + "s...") time.sleep(self.config["TIME"]['after_scale']) def check_rule(self, policie, value): """Check rule and return number intances need scale @param models.Policie policies @param tuple value values of metric @return integer number intances need scale """ delta = {} delta["up"] = 0 delta["down"] = 0 # Check upper_threshold if (value[policie["metric_type"]] > policie["upper_threshold"]): delta['up'] = policie["instances_in"] # Check lower_threshold if (value[policie["metric_type"]] < policie["lower_threshold"]): delta['down'] = policie["instances_out"] return delta def autoscaling(self): while True: try: containers_name = self.get_containers_name() avg_cpu = self.avg_cpu_usage(containers_name) avg_mem = self.avg_mem_usage(containers_name) self.logger.info( "Avg cpu usage, avg memmory usage, current instance: %f %f %d", avg_cpu, avg_mem, self.app["instance"]) rs_detal = {} rs_detal['up'] = 0 rs_detal['down'] = 10 for policie in self.app["policies"]: delta = self.check_rule(policie, (avg_cpu, avg_mem)) if (rs_detal['up'] < delta['up']): rs_detal['up'] = delta['up'] if (rs_detal['down'] > delta['down']): rs_detal['down'] = delta['down'] if (rs_detal['up'] > 0): self.scale(rs_detal['up']) elif (rs_detal['down'] > 0): self.scale(0 - rs_detal['down']) except Exception as e: self.logger.debug(str(e)) finally: time.sleep(self.config["TIME"]['monitor'])
full_config = config_read.read() if 'anonReload' in full_config: logging.info('Configuration already supports anonymous REST reloads.') # Only shim in anonymous reload and restart GeoServer if it hasn't been done before else: config_read.seek(0) with open('%s/security/config.xml-output' % GEOSERVER_DATA_DIR, 'w') as config_write: line_value = config_read.readline() while len(line_value): config_write.write('%s' % line_value) if '<filterChain' in line_value: config_write.write('%s' % filter_inject) line_value = config_read.readline() shutil.move('%s/security/config.xml-output' % GEOSERVER_DATA_DIR, '%s/security/config.xml' % GEOSERVER_DATA_DIR) response = MARATHON_CLIENT.kill_tasks(GEOSERVER_APP) if not len(response) == 1: logging.critical('Error restarting GeoServer') sys.exit(1) wait_for_deployment(MARATHON_CLIENT, GEOSERVER_APP) MARATHON_CLIENT.scale_app(GEOSERVER_APP, GEOSERVER_INSTANCES) block_for_healthy_app(MARATHON_CLIENT, GEOSERVER_APP, GEOSERVER_INSTANCES) logging.info('Bootstrap complete.')
class MarathonHTTPClient(object): def __init__(self, target, auth, options, pkey): self.target = settings.MARATHON_HOST self.auth = auth self.options = options self.pkey = pkey self.registry = settings.REGISTRY_HOST + ':' + settings.REGISTRY_PORT self.client = MarathonClient('http://'+self.target+':8180') self.fleet = FleetHTTPClient('/var/run/fleet.sock', auth, options, pkey) # helpers def _app_id(self, name): return name.replace('_', '.') # container api def create(self, name, image, command='', **kwargs): """Create a container""" app_id = self._app_id(name) l = locals().copy() l.update(re.match(MATCH, name).groupdict()) image = self.registry + '/' + image mems = kwargs.get('memory', {}).get(l['c_type']) m = 0 if mems: mems = mems.lower() if mems[-2:-1].isalpha() and mems[-1].isalpha(): mems = mems[:-1] m = int(mems[:-1]) c = 0.5 cpu = kwargs.get('cpu', {}).get(l['c_type']) if cpu: c = cpu cmd = "docker run --name {name} -P {image} {command}".format(**locals()) self.client.create_app(app_id, MarathonApp(cmd=cmd, mem=m, cpus=c)) self.client.scale_app(app_id, 0, force=True) for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 0: return time.sleep(1) def start(self, name): """Start a container""" self.client.scale_app(self._app_id(name), 1, force=True) for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 1: break time.sleep(1) host = self.client.get_app(self._app_id(name)).tasks[0].host self._waitforcontainer(host, name) def stop(self, name): """Stop a container""" raise NotImplementedError def destroy(self, name): """Destroy a container""" try: host = self.client.get_app(self._app_id(name)).tasks[0].host self.client.delete_app(self._app_id(name), force=True) self._delete_container(host, name) except: self.client.delete_app(self._app_id(name), force=True) def _get_container_state(self, host, name): docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version='1.17') try: if docker_cli.inspect_container(name)['State']['Running']: return JobState.up except: return JobState.destroyed def _waitforcontainer(self, host, name): for _ in xrange(POLL_WAIT): if self._get_container_state(host, name) == JobState.up: return time.sleep(1) raise RuntimeError("App container Not Started") def _delete_container(self, host, name): docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version='1.17') if docker_cli.inspect_container(name)['State']: docker_cli.remove_container(name, force=True) def run(self, name, image, entrypoint, command): # noqa """Run a one-off command""" return self.fleet.run(name, image, entrypoint, command) def state(self, name): try: for _ in xrange(POLL_ATTEMPTS): if self.client.get_app(self._app_id(name)).tasks_running == 1: return JobState.up elif self.client.get_app(self._app_id(name)).tasks_running == 0: return JobState.created time.sleep(1) except: return JobState.destroyed def attach(self, name): """ Attach to a job's stdin, stdout and stderr """ raise NotImplementedError
class Scaler: """Class for Scaling""" def __init__(self, app_name, config): self.logger = logging.getLogger("autoscaling") self.logger.setLevel(logging.DEBUG) self.logger.debug("Init object scaler...") self.config = config self.logger.debug("Connect RESTful mariadb and get policies...") conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'], config["MARIA_RESTFUL"]['port']) conn.request("GET", "/app/name/"+app_name) json_app = conn.getresponse().read().decode("utf-8") self.app = json.loads(json_app) conn.request("GET", "/app/name/"+app_name+"/policies") json_policies = conn.getresponse().read().decode("utf-8") self.app["policies"] = json.loads(json_policies) self.logger.debug("Connect influxdb and marathon...") self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"], config["INFLUXDB"]["port"], config["INFLUXDB"]["username"], config["INFLUXDB"]["password"], config["INFLUXDB"]["db_name"]) self.marathon_client = MarathonClient('http://'+config["MARATHON"]['host']+':'+config["MARATHON"]['port']) self.app["instance"] = self.marathon_client.get_app(app_name).instances self.app["mem"] = self.marathon_client.get_app(app_name).mem self.app["cpus"] = self.marathon_client.get_app(app_name).cpus self.logger.debug("Reconfig haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://"+config["MARATHON"]["host"]+":"+config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg") def setup_logging(self, log_file = "autoscaling.log", level = logging.INFO, formatter = None): if(formatter == None): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler(log_file) fh.setLevel(level) fh.setFormatter(formatter) self.logger.addHandler(fh) def get_cpu_usage(self, container_name): """Return cpu usage of container_name @param string container_name container name """ query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage from stats where container_name = '"+container_name+"' and time > now()-5m group by time(2s) " result = self.influx_client.query(query) points = result[0]["points"] return (points[0][1]/1000000000/self.app["cpus"])*100 def get_container_name(self, mesos_task_id): """Return container name mapping with mesos_task_id in messos @param string mesos_task_id """ query = "select container_name from "+self.config["INFLUXDB"]["ts_mapping"]+" where time>now() - 5m and mesos_task_id = '" +mesos_task_id+"' limit 1" result = self.influx_client.query(query) points = result[0]["points"] return points[0][2] def get_containers_name(self): """Return list all containers name of application have name app_name @param string app_name name of application @return list all containers name of app_name """ tasks = self.marathon_client.list_tasks(self.app["name"]) containers_name = [] for task in tasks: containers_name.append(self.get_container_name(task.id)) return containers_name def avg_mem_usage(self, containers_name): """Return avg memmory usage of all containers in list containers_name @param list containers_name list containers name @return float avg mem usage """ number_container = len(containers_name) containers_name = ["'"+x+"'" for x in containers_name] containers_name = ",".join(containers_name) query = "select memory_usage,container_name from stats where time > now()-5m and container_name in ("+containers_name+") limit "+str(number_container*2) result = self.influx_client.query(query) points = result[0]["points"] sum_memory_usage = 0 for point in points: if(point[3] != None): sum_memory_usage += point[3]/(self.app["mem"]*1048576)*100 return sum_memory_usage / number_container def avg_cpu_usage(self, containers_name): """Return avg cpu usage of all containers in list containers_name @param list containers_name list containers name @return float avg cpu usage """ number_container = len(containers_name) containers_name = ["'"+x+"'" for x in containers_name] containers_name = ",".join(containers_name) query = "select DERIVATIVE(cpu_cumulative_usage) as cpu_usage,container_name from stats where time > now()-5m and container_name in ("+containers_name+") group by time(10s),container_name limit "+str(number_container) result = self.influx_client.query(query) points = result[0]["points"] sum_cpu_usage = 0 for point in points: sum_cpu_usage += point[1]/1000000000/self.app["cpus"]*100 return sum_cpu_usage / number_container def scale(self, delta): """sacle app_name (add or remove) delta intances @param string app_name name of application @param int delta number intances add or remove """ new_instance = self.app["instance"] + delta if(new_instance > self.app['max_instances']): new_instance = self.app['max_instances'] if(new_instance < self.app['min_instances']): new_instance = self.app['min_instances'] if(new_instance != self.app["instance"]): self.marathon_client.scale_app(self.app["name"], new_instance) self.logger.debug("Scaling "+self.app["name"]+" to: "+str(new_instance)) self.logger.debug("Waiting for config file haproxy.cfg...") time.sleep(self.config["TIME"]['w_config_ha']) self.logger.debug("Config file haproxy.cfg...") os.system("sudo ./servicerouter.py --marathon http://"+self.config["MARATHON"]["host"]+":"+self.config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg") self.app["instance"] =self.marathon_client.get_app(self.app["name"]).instances self.logger.debug("Sleep "+str(self.config["TIME"]['after_scale'])+"s...") time.sleep(self.config["TIME"]['after_scale']) def check_rule(self, policie, value): """Check rule and return number intances need scale @param models.Policie policies @param tuple value values of metric @return integer number intances need scale """ delta = {} delta["up"] = 0 delta["down"] = 0 # Check upper_threshold if(value[policie["metric_type"]] > policie["upper_threshold"]): delta['up'] = policie["instances_in"] # Check lower_threshold if(value[policie["metric_type"]] < policie["lower_threshold"]): delta['down'] = policie["instances_out"] return delta def autoscaling(self): while True: try: containers_name = self.get_containers_name() avg_cpu = self.avg_cpu_usage(containers_name) avg_mem = self.avg_mem_usage(containers_name) self.logger.info("Avg cpu usage, avg memmory usage, current instance: %f %f %d", avg_cpu, avg_mem, self.app["instance"]) rs_detal = {} rs_detal['up'] = 0 rs_detal['down'] = 10 for policie in self.app["policies"]: delta = self.check_rule(policie, (avg_cpu, avg_mem)) if(rs_detal['up'] < delta['up']): rs_detal['up'] = delta['up'] if(rs_detal['down'] > delta['down']): rs_detal['down'] = delta['down'] if(rs_detal['up'] > 0): self.scale(rs_detal['up']) elif(rs_detal['down'] > 0): self.scale(0-rs_detal['down']) except Exception as e: self.logger.debug(str(e)) finally: time.sleep(self.config["TIME"]['monitor'])
def scale_application(client: MarathonClient, appid: str, instances: int): deployment = client.scale_app(appid, instances, force=True) wait_for_deployment(client, deployment)
while True: print("Loop!") app = None while app is None: try: app = c.get_app(marathon_app) except MarathonError as err: print(err) app = None time.sleep(1) waitingDocs = r.llen("celery") instances = app.instances if waitingDocs == instances: pass elif waitingDocs > instances: instances += 1 else: instances -= 1 instances = min(max_instances, max(min_instances, instances)) print("App instances: ", app.instances, " - New value: ", instances) if app.instances != instances: print("Delta: ", (app.instances - instances)) c.scale_app(marathon_app, instances=instances, force=True) sys.stdout.flush() time.sleep(2)