def rolling_replace_app(service_name, app1_id, app2_id, app2_config, labels): print ' replacing '+app1_id+' with '+app2_id marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port)) app1 = marathon_client.get_app(app1_id) old_tasks = app1.tasks # launcher.launch(group2.service.name, group2.encode_marathon_id, group2.config, instances = 0) launcher.launch_app(service_name, app2_id, app2_config, labels, instances = 0 ) new_app = marathon_client.get_app(app2_id) for old_task in old_tasks: # # replace each old task with a new task of the new app # num_started = num_started_tasks(app2_id) new_instances = num_started+1 # add 1 instance of new task launcher.update_app(app2_id, app2_config, new_instances) while num_started < new_instances: time.sleep(1) print 'waiting for app to start '+str(num_started) num_started = num_started_tasks(app2_id) # # take down old task # marathon_client.kill_task(app1_id, old_task.id, scale=True) marathon_client.delete_app(app1_id)
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id, timeout=300): st_time = time.time() while(time.time() - st_time < timeout): try: try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a except: l.info("mcli: get_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli get_app timed out, possible zookeper/marathon/mesos malfunction") def delete_app(self, app_id, force=False, timeout=200): st_time = time.time() while(time.time() - st_time < timeout): try: self.mcli.delete_app(app_id, force) return except: l.info("mcli: delete_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli delete_app timed out, possible zookeper/marathon/mesos malfunction") def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count, sleep_before_next_try=1): cnt = 0 while True: a1 = self.get_app(app) # if tasks_running are greater (due to whatever reason, scale down accordingly) if a1.tasks_running > running_count: delta = a1.tasks_running - running_count l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count) self.scale_app(app, running_count) # Allow for some time before next poll time.sleep(1) continue if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(sleep_before_next_try) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale, timeout=300): st_time = time.time() while(time.time() - st_time < timeout): try: self.mcli.scale_app(app, scale) return except: l.info("mcli: scale_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli scale_app timed out, possible zookeper/marathon/mesos malfunction") def ping(self): return self.mcli.ping() def kill_task(self, app_id, task_id): return self.mcli.kill_task(app_id, task_id)
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id, timeout=300): st_time = time.time() while (time.time() - st_time < timeout): try: try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a except: l.info("mcli: get_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli get_app timed out, possible zookeper/marathon/mesos malfunction" ) def delete_app(self, app_id, force=False, timeout=200): st_time = time.time() while (time.time() - st_time < timeout): try: self.mcli.delete_app(app_id, force) return except: l.info("mcli: delete_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli delete_app timed out, possible zookeper/marathon/mesos malfunction" ) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error( "Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str( e ).find('App is locked by one or more deployments. Override with the option' ) >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count, sleep_before_next_try=1): cnt = 0 while True: a1 = self.get_app(app) # if tasks_running are greater (due to whatever reason, scale down accordingly) if a1.tasks_running > running_count: delta = a1.tasks_running - running_count l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count) self.scale_app(app, running_count) # Allow for some time before next poll time.sleep(1) continue if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(sleep_before_next_try) if (cnt % 30) == 29: l.info( "[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale, timeout=300): st_time = time.time() while (time.time() - st_time < timeout): try: self.mcli.scale_app(app, scale) return except: l.info("mcli: scale_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli scale_app timed out, possible zookeper/marathon/mesos malfunction" ) def ping(self): return self.mcli.ping() def kill_task(self, app_id, task_id): return self.mcli.kill_task(app_id, task_id)