def given_an_old_app_to_be_destroyed_constraints(context, constraints): constraints = eval(constraints) old_app_name = "bounce.test1.oldapp.confighash" context.old_ids = [old_app_name] context.old_app_config = { 'id': old_app_name, 'cmd': '/bin/sleep 300', 'instances': 2, 'container': { 'type': 'DOCKER', 'docker': { 'network': 'BRIDGE', 'image': 'busybox', }, }, 'backoff_seconds': 1, 'backoff_factor': 1, 'constraints': constraints, } with contextlib.nested( mock.patch('paasta_tools.bounce_lib.create_app_lock'), ) as ( mock_creat_app_lock, ): bounce_lib.create_marathon_app(old_app_name, context.old_app_config, context.marathon_client)
def given_an_old_app_to_be_destroyed(context): old_app_name = "bounce.test1.oldapp.confighash" context.old_ids = [old_app_name] context.old_app_config = { "id": old_app_name, "cmd": "/bin/sleep 300", "instances": 2, "backoff_seconds": 0.1, "backoff_factor": 1, } with contextlib.nested(mock.patch("paasta_tools.bounce_lib.create_app_lock")) as (mock_creat_app_lock,): bounce_lib.create_marathon_app(old_app_name, context.old_app_config, context.marathon_client)
def test_create_marathon_app(self): marathon_client_mock = mock.create_autospec(marathon.MarathonClient) fake_client = marathon_client_mock fake_config = {'id': 'fake_creation'} with mock.patch( 'paasta_tools.bounce_lib.wait_for_create', autospec=True, ) as wait_patch: bounce_lib.create_marathon_app('fake_creation', fake_config, fake_client) assert fake_client.create_app.call_count == 1 actual_call_args = fake_client.create_app.call_args actual_config = actual_call_args[0][1] assert actual_config.id == 'fake_creation' wait_patch.assert_called_once_with(fake_config['id'], fake_client)
def test_create_marathon_app(self): marathon_client_mock = mock.create_autospec(marathon.MarathonClient) fake_client = marathon_client_mock fake_config = {"id": "fake_creation"} with mock.patch("paasta_tools.bounce_lib.wait_for_create", autospec=True) as wait_patch: with mock.patch("time.sleep", autospec=True): bounce_lib.create_marathon_app("fake_creation", fake_config, fake_client) assert fake_client.create_app.call_count == 1 actual_call_args = fake_client.create_app.call_args actual_config = actual_call_args[0][1] assert actual_config.id == "fake_creation" wait_patch.assert_called_once_with(fake_config["id"], fake_client)
def given_an_old_app_to_be_destroyed(context): old_app_name = "bounce.test1.oldapp.confighash" context.old_ids = [old_app_name] context.old_app_config = { 'id': old_app_name, 'cmd': '/bin/sleep 300', 'instances': 2, 'backoff_seconds': 1, 'backoff_factor': 1, } with contextlib.nested( mock.patch('paasta_tools.bounce_lib.create_app_lock'), ) as ( mock_creat_app_lock, ): bounce_lib.create_marathon_app(old_app_name, context.old_app_config, context.marathon_client)
def given_an_old_app_to_be_destroyed(context): old_app_name = "bounce.test1.oldapp.confighash" context.old_ids = [old_app_name] context.old_app_config = { 'id': old_app_name, 'cmd': '/bin/sleep 300', 'instances': 2, 'backoff_seconds': 0.1, 'backoff_factor': 1, } with contextlib.nested( mock.patch('paasta_tools.bounce_lib.create_app_lock'), ) as ( mock_creat_app_lock, ): bounce_lib.create_marathon_app(old_app_name, context.old_app_config, context.marathon_client)
def test_create_marathon_app(self): marathon_client_mock = mock.create_autospec(marathon.MarathonClient) fake_client = marathon_client_mock fake_config = {'id': 'fake_creation'} with contextlib.nested( mock.patch('paasta_tools.bounce_lib.create_app_lock', spec=contextlib.contextmanager, autospec=None), mock.patch('paasta_tools.bounce_lib.wait_for_create', autospec=True), ) as ( lock_patch, wait_patch, ): bounce_lib.create_marathon_app('fake_creation', fake_config, fake_client) assert lock_patch.called assert fake_client.create_app.call_count == 1 actual_call_args = fake_client.create_app.call_args actual_config = actual_call_args[0][1] assert actual_config.id == 'fake_creation' wait_patch.assert_called_once_with(fake_config['id'], fake_client)
def given_an_old_app_to_be_destroyed_constraints(context, constraints): constraints = eval(constraints) old_app_name = "bounce.test1.oldapp.confighash" context.old_ids = [old_app_name] context.old_app_config = { "id": old_app_name, "cmd": "/bin/sleep 300", "instances": 2, "container": { "type": "DOCKER", "docker": {"network": "BRIDGE", "image": "busybox"}, }, "backoff_seconds": 1, "backoff_factor": 1, "constraints": constraints, } bounce_lib.create_marathon_app( old_app_name, context.old_app_config, context.current_client )
def given_an_old_app_to_be_destroyed_constraints(context, constraints): constraints = eval(constraints) old_app_name = "bounce.test1.oldapp.confighash" context.old_ids = [old_app_name] context.old_app_config = { 'id': old_app_name, 'cmd': '/bin/sleep 300', 'instances': 2, 'container': { 'type': 'DOCKER', 'docker': { 'network': 'BRIDGE', 'image': 'busybox', }, }, 'backoff_seconds': 1, 'backoff_factor': 1, 'constraints': constraints, } bounce_lib.create_marathon_app(old_app_name, context.old_app_config, context.marathon_client)
def do_bounce( bounce_func, drain_method, config, new_app_running, happy_new_tasks, old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, service, bounce_method, serviceinstance, cluster, instance, marathon_jobid, client, soa_dir, ): def log_bounce_action(line, level='debug'): return _log(service=service, line=line, component='deploy', level=level, cluster=cluster, instance=instance) # log if we're not in a steady state. if any([(not new_app_running), old_app_live_happy_tasks.keys()]): log_bounce_action( line=' '.join([ '%s bounce in progress on %s.' % (bounce_method, serviceinstance), 'New marathon app %s %s.' % (marathon_jobid, ('exists' if new_app_running else 'not created yet')), '%d new tasks to bring up.' % (config['instances'] - len(happy_new_tasks)), '%d old tasks receiving traffic and happy.' % len(bounce_lib.flatten_tasks(old_app_live_happy_tasks)), '%d old tasks unhappy.' % len(bounce_lib.flatten_tasks(old_app_live_unhappy_tasks)), '%d old tasks draining.' % len(bounce_lib.flatten_tasks(old_app_draining_tasks)), '%d old apps.' % len(old_app_live_happy_tasks.keys()), ]), level='event', ) else: # In a steady state. Let's let Sensu know everything is fine. send_sensu_bounce_keepalive( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) all_draining_tasks = set() actions = bounce_func( new_config=config, new_app_running=new_app_running, happy_new_tasks=happy_new_tasks, old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, ) if actions['create_app'] and not new_app_running: log_bounce_action(line='%s bounce creating new app with app_id %s' % (bounce_method, marathon_jobid), ) bounce_lib.create_marathon_app(marathon_jobid, config, client) if len(actions['tasks_to_drain']) > 0: tasks_to_drain_by_app_id = defaultdict(set) for task in actions['tasks_to_drain']: tasks_to_drain_by_app_id[task.app_id].add(task) for app_id, tasks in tasks_to_drain_by_app_id.items(): log_bounce_action( line='%s bounce draining %d old tasks with app_id %s' % (bounce_method, len(tasks), app_id), ) for task in actions['tasks_to_drain']: all_draining_tasks.add(task) drain_method.drain(task) for app, tasks in old_app_draining_tasks.items(): for task in tasks: all_draining_tasks.add(task) tasks_to_kill = set() for task in all_draining_tasks: if drain_method.is_safe_to_kill(task): tasks_to_kill.add(task) log_bounce_action(line='%s bounce killing drained task %s' % (bounce_method, task.id)) kill_given_tasks(client=client, task_ids=[task.id for task in tasks_to_kill], scale=True) apps_to_kill = [] for app in old_app_live_happy_tasks.keys(): if app != '/%s' % marathon_jobid: live_happy_tasks = old_app_live_happy_tasks[app] live_unhappy_tasks = old_app_live_unhappy_tasks[app] draining_tasks = old_app_draining_tasks[app] if 0 == len((live_happy_tasks | live_unhappy_tasks | draining_tasks) - tasks_to_kill): apps_to_kill.append(app) if apps_to_kill: log_bounce_action( line='%s bounce removing old unused apps with app_ids: %s' % (bounce_method, ', '.join(apps_to_kill)), ) bounce_lib.kill_old_ids(apps_to_kill, client) all_old_tasks = set.union(set(), *old_app_live_happy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_live_unhappy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_draining_tasks.values()) # log if we appear to be finished if all([ (apps_to_kill or tasks_to_kill), apps_to_kill == old_app_live_happy_tasks.keys(), tasks_to_kill == all_old_tasks, ]): log_bounce_action( line='%s bounce on %s finishing. Now running %s' % (bounce_method, serviceinstance, marathon_jobid), level='event', )
def do_bounce( bounce_func: bounce_lib.BounceMethod, drain_method: drain_lib.DrainMethod, config: marathon_tools.FormattedMarathonAppDict, new_app_running: bool, happy_new_tasks: List[Tuple[MarathonTask, MarathonClient]], old_app_live_happy_tasks: Dict[Tuple[str, MarathonClient], Set[MarathonTask]], old_app_live_unhappy_tasks: Dict[Tuple[str, MarathonClient], Set[MarathonTask]], old_app_draining_tasks: Dict[Tuple[str, MarathonClient], Set[MarathonTask]], old_app_at_risk_tasks: Dict[Tuple[str, MarathonClient], Set[MarathonTask]], service: str, bounce_method: str, serviceinstance: str, cluster: str, instance: str, marathon_jobid: str, clients: marathon_tools.MarathonClients, soa_dir: str, job_config: marathon_tools.MarathonServiceConfig, bounce_margin_factor: float = 1.0, ) -> Optional[float]: def log_bounce_action(line: str, level: str = 'debug') -> None: return _log( service=service, line=line, component='deploy', level=level, cluster=cluster, instance=instance, ) # log if we're not in a steady state. if any([ (not new_app_running), old_app_live_happy_tasks.keys(), ]): log_bounce_action( line=' '.join([ '%s bounce in progress on %s.' % (bounce_method, serviceinstance), 'New marathon app %s %s.' % (marathon_jobid, ('exists' if new_app_running else 'not created yet')), '%d new tasks to bring up.' % (config['instances'] - len(happy_new_tasks)), '%d old tasks receiving traffic and happy.' % len(bounce_lib.flatten_tasks(old_app_live_happy_tasks)), '%d old tasks unhappy.' % len(bounce_lib.flatten_tasks(old_app_live_unhappy_tasks)), '%d old tasks draining.' % len(bounce_lib.flatten_tasks(old_app_draining_tasks)), '%d old tasks at risk.' % len(bounce_lib.flatten_tasks(old_app_at_risk_tasks)), '%d old apps.' % len(old_app_live_happy_tasks.keys()), ]), level='event', ) else: log.debug("Nothing to do, bounce is in a steady state") new_client = clients.get_current_client_for_service(job_config) old_non_draining_tasks = list( old_app_tasks_to_task_client_pairs(old_app_live_happy_tasks), ) + list( old_app_tasks_to_task_client_pairs(old_app_live_unhappy_tasks), ) + list(old_app_tasks_to_task_client_pairs(old_app_at_risk_tasks), ) actions = bounce_func( new_config=config, new_app_running=new_app_running, happy_new_tasks=happy_new_tasks, old_non_draining_tasks=old_non_draining_tasks, margin_factor=bounce_margin_factor, ) if actions['create_app'] and not new_app_running: log_bounce_action(line='%s bounce creating new app with app_id %s' % (bounce_method, marathon_jobid), ) with requests_cache.disabled(): try: bounce_lib.create_marathon_app( app_id=marathon_jobid, config=config, client=new_client, ) except MarathonHttpError as e: if e.status_code == 409: log.warning( "Failed to create, app %s already exists. This means another bounce beat us to it." " Skipping the rest of the bounce for this run" % marathon_jobid, ) return 60 raise tasks_to_kill = drain_tasks_and_find_tasks_to_kill( tasks_to_drain=actions['tasks_to_drain'], already_draining_tasks=old_app_tasks_to_task_client_pairs( old_app_draining_tasks), drain_method=drain_method, log_bounce_action=log_bounce_action, bounce_method=bounce_method, at_risk_tasks=old_app_tasks_to_task_client_pairs( old_app_at_risk_tasks), ) tasks_to_kill_by_client: Dict[MarathonClient, List[MarathonTask]] = defaultdict(list) for task, client in tasks_to_kill: tasks_to_kill_by_client[client].append(task) for client, tasks in tasks_to_kill_by_client.items(): kill_given_tasks(client=client, task_ids=[task.id for task in tasks], scale=True) for task in bounce_lib.flatten_tasks(old_app_at_risk_tasks): if task in tasks_to_kill: hostname = task.host try: reserve_all_resources([hostname]) except HTTPError: log.warning("Failed to reserve resources on %s" % hostname) apps_to_kill: List[Tuple[str, MarathonClient]] = [] for app, client in old_app_live_happy_tasks.keys(): if app != '/%s' % marathon_jobid or client != new_client: live_happy_tasks = old_app_live_happy_tasks[(app, client)] live_unhappy_tasks = old_app_live_unhappy_tasks[(app, client)] draining_tasks = old_app_draining_tasks[(app, client)] at_risk_tasks = old_app_at_risk_tasks[(app, client)] remaining_tasks = (live_happy_tasks | live_unhappy_tasks | draining_tasks | at_risk_tasks) for task, _ in tasks_to_kill: remaining_tasks.discard(task) if 0 == len(remaining_tasks): apps_to_kill.append((app, client)) if apps_to_kill: log_bounce_action( line='%s bounce removing old unused apps with app_ids: %s' % ( bounce_method, ', '.join([app for app, client in apps_to_kill]), ), ) with requests_cache.disabled(): for app_id, client in apps_to_kill: bounce_lib.kill_old_ids([app_id], client) all_old_tasks: Set[MarathonTask] = set() all_old_tasks = set.union(all_old_tasks, *old_app_live_happy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_live_unhappy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_draining_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_at_risk_tasks.values()) if all_old_tasks or (not new_app_running): # Still have work more work to do, try again in 60 seconds return 60 else: # log if we appear to be finished if all([ (apps_to_kill or tasks_to_kill), apps_to_kill == list(old_app_live_happy_tasks), tasks_to_kill == all_old_tasks, ]): log_bounce_action( line='%s bounce on %s finishing. Now running %s' % ( bounce_method, serviceinstance, marathon_jobid, ), level='event', ) return None
def do_bounce( bounce_func, drain_method, config, new_app_running, happy_new_tasks, old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, service, bounce_method, serviceinstance, cluster, instance, marathon_jobid, client, soa_dir, ): def log_bounce_action(line, level='debug'): return _log( service=service, line=line, component='deploy', level=level, cluster=cluster, instance=instance ) # log if we're not in a steady state. if any([ (not new_app_running), old_app_live_happy_tasks.keys() ]): log_bounce_action( line=' '.join([ '%s bounce in progress on %s.' % (bounce_method, serviceinstance), 'New marathon app %s %s.' % (marathon_jobid, ('exists' if new_app_running else 'not created yet')), '%d new tasks to bring up.' % (config['instances'] - len(happy_new_tasks)), '%d old tasks receiving traffic and happy.' % len(bounce_lib.flatten_tasks(old_app_live_happy_tasks)), '%d old tasks unhappy.' % len(bounce_lib.flatten_tasks(old_app_live_unhappy_tasks)), '%d old tasks draining.' % len(bounce_lib.flatten_tasks(old_app_draining_tasks)), '%d old apps.' % len(old_app_live_happy_tasks.keys()), ]), level='event', ) else: # In a steady state. Let's let Sensu know everything is fine. send_sensu_bounce_keepalive( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) all_draining_tasks = set() actions = bounce_func( new_config=config, new_app_running=new_app_running, happy_new_tasks=happy_new_tasks, old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, ) if actions['create_app'] and not new_app_running: log_bounce_action( line='%s bounce creating new app with app_id %s' % (bounce_method, marathon_jobid), ) bounce_lib.create_marathon_app(marathon_jobid, config, client) if len(actions['tasks_to_drain']) > 0: tasks_to_drain_by_app_id = defaultdict(set) for task in actions['tasks_to_drain']: tasks_to_drain_by_app_id[task.app_id].add(task) for app_id, tasks in tasks_to_drain_by_app_id.items(): log_bounce_action( line='%s bounce draining %d old tasks with app_id %s' % (bounce_method, len(tasks), app_id), ) for task in actions['tasks_to_drain']: all_draining_tasks.add(task) drain_method.drain(task) for app, tasks in old_app_draining_tasks.items(): for task in tasks: all_draining_tasks.add(task) tasks_to_kill = set() for task in all_draining_tasks: if drain_method.is_safe_to_kill(task): tasks_to_kill.add(task) log_bounce_action(line='%s bounce killing drained task %s' % (bounce_method, task.id)) client.kill_given_tasks(task_ids=[task.id for task in tasks_to_kill], scale=True) apps_to_kill = [] for app in old_app_live_happy_tasks.keys(): if app != '/%s' % marathon_jobid: live_happy_tasks = old_app_live_happy_tasks[app] live_unhappy_tasks = old_app_live_unhappy_tasks[app] draining_tasks = old_app_draining_tasks[app] if 0 == len((live_happy_tasks | live_unhappy_tasks | draining_tasks) - tasks_to_kill): apps_to_kill.append(app) if apps_to_kill: log_bounce_action( line='%s bounce removing old unused apps with app_ids: %s' % ( bounce_method, ', '.join(apps_to_kill) ), ) bounce_lib.kill_old_ids(apps_to_kill, client) all_old_tasks = set.union(set(), *old_app_live_happy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_live_unhappy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_draining_tasks.values()) # log if we appear to be finished if all([ (apps_to_kill or tasks_to_kill), apps_to_kill == old_app_live_happy_tasks.keys(), tasks_to_kill == all_old_tasks, ]): log_bounce_action( line='%s bounce on %s finishing. Now running %s' % ( bounce_method, serviceinstance, marathon_jobid ), level='event', )
def do_bounce( bounce_func, drain_method, config, new_app_running, happy_new_tasks, old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, old_app_at_risk_tasks, service, bounce_method, serviceinstance, cluster, instance, marathon_jobid, client, soa_dir, bounce_margin_factor=1.0, ): def log_bounce_action(line, level='debug'): return _log(service=service, line=line, component='deploy', level=level, cluster=cluster, instance=instance) # log if we're not in a steady state. if any([(not new_app_running), old_app_live_happy_tasks.keys()]): log_bounce_action( line=' '.join([ '%s bounce in progress on %s.' % (bounce_method, serviceinstance), 'New marathon app %s %s.' % (marathon_jobid, ('exists' if new_app_running else 'not created yet')), '%d new tasks to bring up.' % (config['instances'] - len(happy_new_tasks)), '%d old tasks receiving traffic and happy.' % len(bounce_lib.flatten_tasks(old_app_live_happy_tasks)), '%d old tasks unhappy.' % len(bounce_lib.flatten_tasks(old_app_live_unhappy_tasks)), '%d old tasks draining.' % len(bounce_lib.flatten_tasks(old_app_draining_tasks)), '%d old tasks at risk.' % len(bounce_lib.flatten_tasks(old_app_at_risk_tasks)), '%d old apps.' % len(old_app_live_happy_tasks.keys()), ]), level='event', ) else: log.debug("Nothing to do, bounce is in a steady state") actions = bounce_func( new_config=config, new_app_running=new_app_running, happy_new_tasks=happy_new_tasks, old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, margin_factor=bounce_margin_factor, ) if actions['create_app'] and not new_app_running: log_bounce_action(line='%s bounce creating new app with app_id %s' % (bounce_method, marathon_jobid), ) with requests_cache.disabled(): try: bounce_lib.create_marathon_app(marathon_jobid, config, client) except MarathonHttpError as e: if e.status_code == 409: log.warning( "Failed to create, app %s already exists. This means another bounce beat us to it." " Skipping the rest of the bounce for this run" % marathon_jobid) return raise tasks_to_kill = drain_tasks_and_find_tasks_to_kill( tasks_to_drain=actions['tasks_to_drain'], already_draining_tasks=bounce_lib.flatten_tasks( old_app_draining_tasks), drain_method=drain_method, log_bounce_action=log_bounce_action, bounce_method=bounce_method, at_risk_tasks=bounce_lib.flatten_tasks(old_app_at_risk_tasks), ) kill_given_tasks(client=client, task_ids=[task.id for task in tasks_to_kill], scale=True) for task in bounce_lib.flatten_tasks(old_app_at_risk_tasks): if task in tasks_to_kill: hostname = task.host try: reserve_all_resources([hostname]) except HTTPError: log.warning("Failed to reserve resources on %s" % hostname) apps_to_kill = [] for app in old_app_live_happy_tasks.keys(): if app != '/%s' % marathon_jobid: live_happy_tasks = old_app_live_happy_tasks[app] live_unhappy_tasks = old_app_live_unhappy_tasks[app] draining_tasks = old_app_draining_tasks[app] at_risk_tasks = old_app_at_risk_tasks[app] if 0 == len((live_happy_tasks | live_unhappy_tasks | draining_tasks | at_risk_tasks) - tasks_to_kill): apps_to_kill.append(app) if apps_to_kill: log_bounce_action( line='%s bounce removing old unused apps with app_ids: %s' % (bounce_method, ', '.join(apps_to_kill)), ) with requests_cache.disabled(): bounce_lib.kill_old_ids(apps_to_kill, client) all_old_tasks = set.union(set(), *old_app_live_happy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_live_unhappy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_draining_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_at_risk_tasks.values()) # log if we appear to be finished if all([ (apps_to_kill or tasks_to_kill), apps_to_kill == list(old_app_live_happy_tasks), tasks_to_kill == all_old_tasks, ]): log_bounce_action( line='%s bounce on %s finishing. Now running %s' % (bounce_method, serviceinstance, marathon_jobid), level='event', )
def do_bounce( bounce_func, drain_method, config, new_app_running, happy_new_tasks, old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks, old_app_at_risk_tasks, service, bounce_method, serviceinstance, cluster, instance, marathon_jobid, client, soa_dir, bounce_margin_factor=1.0, ): def log_bounce_action(line, level='debug'): return _log( service=service, line=line, component='deploy', level=level, cluster=cluster, instance=instance ) # log if we're not in a steady state. if any([ (not new_app_running), old_app_live_happy_tasks.keys() ]): log_bounce_action( line=' '.join([ '%s bounce in progress on %s.' % (bounce_method, serviceinstance), 'New marathon app %s %s.' % (marathon_jobid, ('exists' if new_app_running else 'not created yet')), '%d new tasks to bring up.' % (config['instances'] - len(happy_new_tasks)), '%d old tasks receiving traffic and happy.' % len(bounce_lib.flatten_tasks(old_app_live_happy_tasks)), '%d old tasks unhappy.' % len(bounce_lib.flatten_tasks(old_app_live_unhappy_tasks)), '%d old tasks draining.' % len(bounce_lib.flatten_tasks(old_app_draining_tasks)), '%d old tasks at risk.' % len(bounce_lib.flatten_tasks(old_app_at_risk_tasks)), '%d old apps.' % len(old_app_live_happy_tasks.keys()), ]), level='event', ) else: log.debug("Nothing to do, bounce is in a steady state") actions = bounce_func( new_config=config, new_app_running=new_app_running, happy_new_tasks=happy_new_tasks, old_app_live_happy_tasks=old_app_live_happy_tasks, old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, margin_factor=bounce_margin_factor, ) if actions['create_app'] and not new_app_running: log_bounce_action( line='%s bounce creating new app with app_id %s' % (bounce_method, marathon_jobid), ) with requests_cache.disabled(): bounce_lib.create_marathon_app(marathon_jobid, config, client) tasks_to_kill = drain_tasks_and_find_tasks_to_kill( tasks_to_drain=actions['tasks_to_drain'], already_draining_tasks=bounce_lib.flatten_tasks(old_app_draining_tasks), drain_method=drain_method, log_bounce_action=log_bounce_action, bounce_method=bounce_method, at_risk_tasks=bounce_lib.flatten_tasks(old_app_at_risk_tasks), ) kill_given_tasks(client=client, task_ids=[task.id for task in tasks_to_kill], scale=True) for task in bounce_lib.flatten_tasks(old_app_at_risk_tasks): if task in tasks_to_kill: hostname = task.host reserve_all_resources([hostname]) apps_to_kill = [] for app in old_app_live_happy_tasks.keys(): if app != '/%s' % marathon_jobid: live_happy_tasks = old_app_live_happy_tasks[app] live_unhappy_tasks = old_app_live_unhappy_tasks[app] draining_tasks = old_app_draining_tasks[app] at_risk_tasks = old_app_at_risk_tasks[app] if 0 == len((live_happy_tasks | live_unhappy_tasks | draining_tasks | at_risk_tasks) - tasks_to_kill): apps_to_kill.append(app) if apps_to_kill: log_bounce_action( line='%s bounce removing old unused apps with app_ids: %s' % ( bounce_method, ', '.join(apps_to_kill) ), ) with requests_cache.disabled(): bounce_lib.kill_old_ids(apps_to_kill, client) all_old_tasks = set.union(set(), *old_app_live_happy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_live_unhappy_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_draining_tasks.values()) all_old_tasks = set.union(all_old_tasks, *old_app_at_risk_tasks.values()) # log if we appear to be finished if all([ (apps_to_kill or tasks_to_kill), apps_to_kill == old_app_live_happy_tasks.keys(), tasks_to_kill == all_old_tasks, ]): log_bounce_action( line='%s bounce on %s finishing. Now running %s' % ( bounce_method, serviceinstance, marathon_jobid ), level='event', )
def do_bounce( bounce_func, drain_method, config, new_app_running, happy_new_tasks, old_app_live_tasks, old_app_draining_tasks, service, bounce_method, serviceinstance, cluster, instance, marathon_jobid, client, soa_dir, ): def log_bounce_action(line, level="debug"): return _log(service=service, line=line, component="deploy", level=level, cluster=cluster, instance=instance) # log if we're not in a steady state. if any([(not new_app_running), old_app_live_tasks.keys()]): log_bounce_action( line=" ".join( [ "%s bounce in progress on %s." % (bounce_method, serviceinstance), "New marathon app %s %s." % (marathon_jobid, ("exists" if new_app_running else "not created yet")), "%d new tasks to bring up." % (config["instances"] - len(happy_new_tasks)), "%d old tasks receiving traffic." % sum(len(tasks) for tasks in old_app_live_tasks.values()), "%d old tasks draining." % sum(len(tasks) for tasks in old_app_draining_tasks.values()), "%d old apps." % len(old_app_live_tasks.keys()), ] ), level="event", ) else: # In a steady state. Let's let Sensu know everything is fine. send_sensu_bounce_keepalive(service=service, instance=instance, cluster=cluster, soa_dir=soa_dir) all_draining_tasks = set() actions = bounce_func( new_config=config, new_app_running=new_app_running, happy_new_tasks=happy_new_tasks, old_app_live_tasks=old_app_live_tasks, ) if actions["create_app"] and not new_app_running: log_bounce_action(line="%s bounce creating new app with app_id %s" % (bounce_method, marathon_jobid)) bounce_lib.create_marathon_app(marathon_jobid, config, client) if len(actions["tasks_to_drain"]) > 0: tasks_to_drain_by_app_id = {} for task in actions["tasks_to_drain"]: tasks_to_drain_by_app_id.setdefault(task.app_id, set()).add(task) for app_id, tasks in tasks_to_drain_by_app_id.items(): log_bounce_action( line="%s bounce draining %d old tasks with app_id %s" % (bounce_method, len(tasks), app_id) ) for task in actions["tasks_to_drain"]: all_draining_tasks.add(task) drain_method.drain(task) for app, tasks in old_app_draining_tasks.items(): for task in tasks: all_draining_tasks.add(task) killed_tasks = set() for task in all_draining_tasks: if drain_method.is_safe_to_kill(task): killed_tasks.add(task) log_bounce_action(line="%s bounce killing drained task %s" % (bounce_method, task.id)) marathon_tools.kill_task(client=client, app_id=task.app_id, task_id=task.id, scale=True) apps_to_kill = [] for app in old_app_live_tasks.keys(): live_tasks = old_app_live_tasks[app] draining_tasks = old_app_draining_tasks[app] if 0 == len((live_tasks | draining_tasks) - killed_tasks): apps_to_kill.append(app) if apps_to_kill: log_bounce_action( line="%s bounce removing old unused apps with app_ids: %s" % (bounce_method, ", ".join(apps_to_kill)) ) bounce_lib.kill_old_ids(apps_to_kill, client) # log if we appear to be finished if all( [ (apps_to_kill or killed_tasks), apps_to_kill == old_app_live_tasks.keys(), killed_tasks == set.union(set(), *(old_app_live_tasks.values() + old_app_draining_tasks.values())), ] ): log_bounce_action( line="%s bounce on %s finishing. Now running %s" % (bounce_method, serviceinstance, marathon_jobid), level="event", )