def and_we_wait_a_bit_for_the_app_to_disappear(context, which): """ Marathon will not make the app disappear until after all the tasks have died https://github.com/mesosphere/marathon/issues/1431 """ for _ in xrange(10): if marathon_tools.is_app_id_running(which_id(context, which), context.marathon_client) is True: time.sleep(0.5) else: return True # It better not be running by now! assert marathon_tools.is_app_id_running(which_id(context, which), context.marathon_client) is False
def status_marathon_job_verbose(service, instance, client, cluster, soa_dir): """Returns detailed information about a marathon apps for a service and instance. Does not make assumptions about what the *exact* appid is, but instead does a fuzzy match on any marathon apps that match the given service.instance""" all_tasks = [] all_output = [] # For verbose mode, we want to see *any* matching app. As it may # not be the one that we think should be deployed. For example # during a bounce we want to see the old and new ones. for app_id in marathon_tools.get_matching_appids(service, instance, client): if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) tasks, output = get_verbose_status_of_marathon_app( marathon_client=client, app=app, service=service, instance=instance, cluster=cluster, soa_dir=soa_dir ) all_tasks.extend(tasks) all_output.append(output) else: all_output.append("Warning: App %s is not running yet." % app_id) return all_tasks, "\n".join(all_output)
def _clean_up_marathon_apps(context): """If a marathon client object exists in our context, delete any apps in Marathon and wait until they die.""" if hasattr(context, "marathon_clients"): still_apps = True while still_apps: still_apps = False for client in context.marathon_clients.get_all_clients(): apps = marathon_tools.list_all_marathon_app_ids(client) if apps: still_apps = True else: continue paasta_print( "after_scenario: Deleting %d apps to prep for the next scenario. %s" % (len(apps), ",".join(apps))) for app in apps: if marathon_tools.is_app_id_running(app, client): paasta_print( "after_scenario: %s does look like it is running. Scaling down and killing it..." % app) client.scale_app(app, instances=0, force=True) time.sleep(1) client.delete_app(app, force=True) else: paasta_print( "after_scenario: %s showed up in the app_list, but doesn't look like it is running?" % app) time.sleep(0.5) for client in context.marathon_clients.get_all_clients(): while client.list_deployments(): paasta_print( "after_scenario: There are still marathon deployments in progress. sleeping." ) time.sleep(0.5)
def then_the_which_app_should_be_gone(context, which): assert ( marathon_tools.is_app_id_running( which_id(context, which), context.current_client ) is False )
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running if len(app.deployments) == 0: deploy_status = PaastaColors.bold("Running") else: deploy_status = PaastaColors.yellow("Deploying") if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green( "(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red( "(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow( "(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s." % ( status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % ( status, name, app_id, red_not)
def kill_marathon_app(full_appid, cluster, client, soa_dir): service, instance, _, __ = (s.replace('--', '_') for s in decompose_job_id(full_appid)) service_instance_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) complete_config = service_instance_config.format_marathon_app_dict() nerve_ns = service_instance_config.get_nerve_namespace() service_namespace_config = marathon_tools.load_service_namespace_config(service=service, namespace=nerve_ns) drain_method = drain_lib.get_drain_method( service_instance_config.get_drain_method(service_namespace_config), service=service, instance=instance, nerve_ns=nerve_ns, drain_method_params=service_instance_config.get_drain_method_params(service_namespace_config), ) bounce_func = bounce_lib.get_bounce_method_func('down') while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) ( old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, old_app_at_risk_tasks, ) = get_tasks_by_state( other_apps=[app_to_kill], drain_method=drain_method, service=service, nerve_ns=nerve_ns, bounce_health_params=service_instance_config.get_bounce_health_params(service_namespace_config), ) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running='', happy_new_tasks=[], old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, old_app_draining_tasks=old_app_draining_tasks, old_app_at_risk_tasks=old_app_at_risk_tasks, serviceinstance="{}.{}".format(service, instance), bounce_method='down', service=service, cluster=cluster, instance=instance, marathon_jobid=full_appid, client=client, soa_dir=soa_dir, ) paasta_print("Sleeping for 10 seconds to give the tasks time to drain") time.sleep(10) paasta_print("Sucessfully killed {}".format(full_appid))
def kill_marathon_app(full_appid, cluster, client, soa_dir): service, instance, _, __ = (s.replace("--", "_") for s in decompose_job_id(full_appid)) service_instance_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir) complete_config = service_instance_config.format_marathon_app_dict() registrations = service_instance_config.get_registrations() service_namespace_config = marathon_tools.load_service_namespace_config( service=service, namespace=registrations[0]) drain_method = drain_lib.get_drain_method( service_instance_config.get_drain_method(service_namespace_config), service=service, instance=instance, registrations=registrations, drain_method_params=service_instance_config.get_drain_method_params( service_namespace_config), ) bounce_func = bounce_lib.get_bounce_method_func("down") while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) ( old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, old_app_at_risk_tasks, ) = get_tasks_by_state( other_apps=[app_to_kill], drain_method=drain_method, service=service, nerve_ns=registrations[0], bounce_health_params=service_instance_config. get_bounce_health_params(service_namespace_config), ) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running="", happy_new_tasks=[], old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, old_app_draining_tasks=old_app_draining_tasks, old_app_at_risk_tasks=old_app_at_risk_tasks, serviceinstance=f"{service}.{instance}", bounce_method="down", service=service, cluster=cluster, instance=instance, marathon_jobid=full_appid, client=client, soa_dir=soa_dir, ) paasta_print("Sleeping for 10 seconds to give the tasks time to drain") time.sleep(10) paasta_print(f"Successfully killed {full_appid}")
def wait_for_create(app_id, client): """Wait for the specified app_id to be listed in marathon. Waits WAIT_CREATE_S seconds between calls to list_apps. :param app_id: The app_id to ensure creation for :param client: A MarathonClient object""" while marathon_tools.is_app_id_running(app_id, client) is False: log.info("Waiting for %s to be created in marathon..", app_id) time.sleep(WAIT_CREATE_S)
def wait_for_delete(app_id, client): """Wait for the specified app_id to not be listed in marathon anymore. Waits WAIT_DELETE_S seconds inbetween checks. :param app_id: The app_id to check for deletion :param client: A MarathonClient object""" while marathon_tools.is_app_id_running(app_id, client) is True: log.info("Waiting for %s to be deleted from marathon...", app_id) time.sleep(WAIT_DELETE_S)
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running if len(app.deployments) == 0: deploy_status = PaastaColors.bold("Running") elif app.instances == 0 and app.tasks_running == 0: deploy_status = PaastaColors.grey("Stopped") else: # App is currently deploying so we should check the launch queue for more info is_overdue, backoff_seconds = marathon_tools.get_app_queue_status( client, app_id) if is_overdue: deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red( "Waiting") elif backoff_seconds: deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % ( PaastaColors.red("Delayed"), backoff_seconds) else: deploy_status = PaastaColors.yellow("Deploying") if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green( "(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red( "(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow( "(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % ( status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % ( status, name, app_id, red_not)
def marathon_job_status(mstatus, client, job_config): try: app_id = job_config.format_marathon_app_dict()['id'] except NoDockerImageError: error_msg = "Docker image is not in deployments.json." mstatus['error_message'] = error_msg return if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) deploy_status, _ = get_marathon_app_deploy_status(app, app_id, client) mstatus['deploy_status'] = MarathonDeployStatus.tostring(deploy_status) # by comparing running count with expected count, callers can figure # out if the instance is in Healthy, Warning or Critical state. mstatus['running_instance_count'] = app.tasks_running mstatus['expected_instance_count'] = job_config.get_instances() else: mstatus['deploy_status'] = 'Not Running'
def status_marathon_job_verbose(service, instance, client): """Returns detailed information about a marathon apps for a service and instance. Does not make assumptions about what the *exact* appid is, but instead does a fuzzy match on any marathon apps that match the given service.instance""" all_tasks = [] all_output = [] # For verbose mode, we want to see *any* matching app. As it may # not be the one that we think should be deployed. For example # during a bounce we want to see the old and new ones. for app_id in marathon_tools.get_matching_appids(service, instance, client): if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) tasks, output = get_verbose_status_of_marathon_app(app) all_tasks.extend(tasks) all_output.append(output) else: all_output.append("Warning: App %s not running." % app_id) return all_tasks, "\n".join(all_output)
def _clean_up_marathon_apps(context): """If a marathon client object exists in our context, delete any apps in Marathon and wait until they die.""" if hasattr(context, 'marathon_client'): while True: apps = marathon_tools.list_all_marathon_app_ids(context.marathon_client) if not apps: break print "after_scenario: Deleting %d apps to prep for the next scenario. %s" % (len(apps), ",".join(apps)) for app in apps: if marathon_tools.is_app_id_running(app, context.marathon_client): print "after_scenario: %s does look like it is running. Scaling down and killing it..." % app context.marathon_client.scale_app(app, instances=0, force=True) time.sleep(1) context.marathon_client.delete_app(app, force=True) else: print "after_scenario: %s showed up in the app_list, but doesn't look like it is running?" % app time.sleep(0.5) while context.marathon_client.list_deployments(): print "after_scenario: There are still marathon deployments in progress. sleeping." time.sleep(0.5)
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running deploy_status = marathon_tools.get_marathon_app_deploy_status_human(app, app_id, client) if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def main(): exit_code = 1 args = parse_args() full_appid = args.appname.lstrip('/') system_paasta_config = load_system_paasta_config() cluster = system_paasta_config.get_cluster() clients = marathon_tools.get_list_of_marathon_clients(system_paasta_config=system_paasta_config) for client in clients: if marathon_tools.is_app_id_running(app_id=full_appid, client=client): kill_marathon_app( full_appid=full_appid, cluster=cluster, client=client, soa_dir=args.soa_dir, ) exit_code = 0 if exit_code: paasta_print("Couldn't find an app named {}".format(full_appid)) return exit_code
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running if len(app.deployments) == 0: deploy_status = PaastaColors.bold("Running") elif app.instances == 0 and app.tasks_running == 0: deploy_status = PaastaColors.grey("Stopped") else: # App is currently deploying so we should check the launch queue for more info is_overdue, backoff_seconds = marathon_tools.get_app_queue_status(client, app_id) if is_overdue: deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red("Waiting") elif backoff_seconds: deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % ( PaastaColors.red("Delayed"), backoff_seconds) else: deploy_status = PaastaColors.yellow("Deploying") if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def then_the_which_app_should_be_running(context, which): assert marathon_tools.is_app_id_running(which_id(context, which), context.marathon_client) is True
def then_the_which_app_should_be_gone(context, which): assert marathon_tools.is_app_id_running(which_id(context, which), context.marathon_client) is False
def main(): args = parse_args() full_appid = args.appname.lstrip('/') soa_dir = args.soa_dir marathon_config = marathon_tools.load_marathon_config() client = marathon_tools.get_marathon_client( url=marathon_config.get_url(), user=marathon_config.get_username(), passwd=marathon_config.get_password(), ) if not marathon_tools.is_app_id_running(app_id=full_appid, client=client): print("Couldn't find an app named {0}".format(full_appid)) sys.exit(1) service, instance, _, __ = (s.replace('--', '_') for s in decompose_job_id(full_appid)) complete_config = marathon_tools.create_complete_config(service, instance, marathon_config) cluster = load_system_paasta_config().get_cluster() service_instance_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) nerve_ns = service_instance_config.get_nerve_namespace() service_namespace_config = marathon_tools.load_service_namespace_config(service=service, namespace=nerve_ns) drain_method = drain_lib.get_drain_method( service_instance_config.get_drain_method(service_namespace_config), service=service, instance=instance, nerve_ns=nerve_ns, drain_method_params=service_instance_config.get_drain_method_params(service_namespace_config), ) bounce_func = bounce_lib.get_bounce_method_func('down') while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) old_app_live_tasks, old_app_draining_tasks = get_old_live_draining_tasks([app_to_kill], drain_method) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running='', happy_new_tasks=[], old_app_live_tasks=old_app_live_tasks, old_app_draining_tasks=old_app_draining_tasks, serviceinstance="{0}.{1}".format(service, instance), bounce_method='down', service=service, cluster=cluster, instance=instance, marathon_jobid=full_appid, client=client, soa_dir=soa_dir, ) print "Sleeping for 10 seconds to give the tasks time to drain" time.sleep(10) print("Sucessfully killed {0}".format(full_appid))
def main(): args = parse_args() full_appid = args.appname.lstrip('/') soa_dir = args.soa_dir marathon_config = marathon_tools.load_marathon_config() client = marathon_tools.get_marathon_client( url=marathon_config.get_url(), user=marathon_config.get_username(), passwd=marathon_config.get_password(), ) if not marathon_tools.is_app_id_running(app_id=full_appid, client=client): print("Couldn't find an app named {0}".format(full_appid)) sys.exit(1) service, instance, _, __ = (s.replace('--', '_') for s in decompose_job_id(full_appid)) complete_config = marathon_tools.create_complete_config( service, instance, marathon_config) cluster = load_system_paasta_config().get_cluster() service_instance_config = marathon_tools.load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) nerve_ns = service_instance_config.get_nerve_namespace() service_namespace_config = marathon_tools.load_service_namespace_config( service=service, namespace=nerve_ns) drain_method = drain_lib.get_drain_method( service_instance_config.get_drain_method(service_namespace_config), service=service, instance=instance, nerve_ns=nerve_ns, drain_method_params=service_instance_config.get_drain_method_params( service_namespace_config), ) bounce_func = bounce_lib.get_bounce_method_func('down') while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) old_app_live_tasks, old_app_draining_tasks = get_old_live_draining_tasks( [app_to_kill], drain_method) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running='', happy_new_tasks=[], old_app_live_tasks=old_app_live_tasks, old_app_draining_tasks=old_app_draining_tasks, serviceinstance="{0}.{1}".format(service, instance), bounce_method='down', service=service, cluster=cluster, instance=instance, marathon_jobid=full_appid, client=client, soa_dir=soa_dir, ) print "Sleeping for 10 seconds to give the tasks time to drain" time.sleep(10) print("Sucessfully killed {0}".format(full_appid))