Example #1
0
def there_are_num_which_tasks(context, num, which, state, exact):
    context.max_tasks = num
    app_id = which_id(context, which)

    # 120 * 0.5 = 60 seconds
    for _ in xrange(120):
        app = context.marathon_client.get_app(app_id, embed_tasks=True)
        happy_tasks = get_happy_tasks(app, context.service, "fake_nerve_ns", context.system_paasta_config)
        happy_count = len(happy_tasks)
        if state == "healthy":
            if exact:
                if happy_count == context.max_tasks:
                    return
            else:
                if happy_count >= context.max_tasks:
                    return
        elif state == "unhealthy":
            if exact:
                if len(app.tasks) - happy_count == context.max_tasks:
                    return
            else:
                if len(app.tasks) - happy_count >= context.max_tasks:
                    return
        time.sleep(0.5)
    raise Exception("timed out waiting for %d %s tasks on %s; there are %d" %
                    (context.max_tasks, state, app_id, len(app.tasks)))
Example #2
0
def when_deploy_service_initiated(context, bounce_method, drain_method):
    with contextlib.nested(
        mock.patch(
            'paasta_tools.bounce_lib.get_happy_tasks',
            autospec=True,
            # Wrap function call so we can select a subset of tasks or test
            # intermediate steps, like when an app is not completely up
            side_effect=lambda app, _, __, **kwargs: get_happy_tasks(
                app, context.service, "fake_nerve_ns")[:context.max_tasks],
        ),
        mock.patch('paasta_tools.bounce_lib.bounce_lock_zookeeper', autospec=True),
        mock.patch('paasta_tools.bounce_lib.create_app_lock', autospec=True),
        mock.patch('paasta_tools.bounce_lib.time.sleep', autospec=True),
        mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
    ) as (
        _,
        _,
        _,
        _,
        mock_load_system_paasta_config,
    ):
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value=context.cluster)
        setup_marathon_job.deploy_service(
            service=context.service,
            instance=context.instance,
            marathon_jobid=context.new_config['id'],
            config=context.new_config,
            client=context.marathon_client,
            bounce_method=bounce_method,
            drain_method_name=drain_method,
            drain_method_params={},
            nerve_ns=context.instance,
            bounce_health_params={},
            soa_dir=None,
        )
Example #3
0
    def test_get_happy_tasks_check_haproxy_multiple_locations(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=True)]) for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with contextlib.nested(
            mock.patch(
                'paasta_tools.bounce_lib.get_registered_marathon_tasks',
                side_effect=[tasks[2:3], tasks[3:]], autospec=True,
            ),
            mock.patch('paasta_tools.mesos_tools.get_mesos_slaves_grouped_by_attribute', autospec=True),
        ) as (
            get_registered_marathon_tasks_patch,
            get_mesos_slaves_grouped_by_attribute_patch,
        ):
            get_mesos_slaves_grouped_by_attribute_patch.return_value = {
                'fake_region': ['fake_host1'],
                'fake_other_region': ['fake_host2'],
            }
            assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', check_haproxy=True) == tasks[2:]
            get_registered_marathon_tasks_patch.assert_any_call(
                'fake_host1',
                DEFAULT_SYNAPSE_PORT,
                'service.namespace',
                tasks,
            )
            get_registered_marathon_tasks_patch.assert_any_call(
                'fake_host2',
                DEFAULT_SYNAPSE_PORT,
                'service.namespace',
                tasks,
            )
Example #4
0
def get_old_happy_unhappy_draining_tasks(other_apps, drain_method, service, nerve_ns, bounce_health_params):
    """Split tasks from old apps into 3 categories:
      - live (not draining) and happy (according to get_happy_tasks)
      - live (not draining) and unhappy
      - draining
    """

    old_app_live_happy_tasks = {}
    old_app_live_unhappy_tasks = {}
    old_app_draining_tasks = {}

    for app in other_apps:
        tasks_by_state = {
            'happy': set(),
            'unhappy': set(),
            'draining': set(),
        }

        happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params)
        for task in app.tasks:
            if drain_method.is_draining(task):
                state = 'draining'
            elif task in happy_tasks:
                state = 'happy'
            else:
                state = 'unhappy'
            tasks_by_state[state].add(task)

        old_app_live_happy_tasks[app.id] = tasks_by_state['happy']
        old_app_live_unhappy_tasks[app.id] = tasks_by_state['unhappy']
        old_app_draining_tasks[app.id] = tasks_by_state['draining']

    return old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks
Example #5
0
 def test_get_happy_tasks_when_some_unhealthy(self):
     """Only tasks with a passing healthcheck should be happy"""
     fake_failing_healthcheck_results = [mock.Mock(alive=False)]
     fake_successful_healthcheck_results = [mock.Mock(alive=True)]
     tasks = [mock.Mock(health_check_results=fake_failing_healthcheck_results),
              mock.Mock(health_check_results=fake_failing_healthcheck_results),
              mock.Mock(health_check_results=fake_successful_healthcheck_results)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace') == tasks[-1:]
Example #6
0
    def test_get_happy_tasks_min_task_uptime_when_unhealthy(self):
        """If we specify a minimum task age, tasks newer than that should not be considered happy."""
        now = datetime.datetime(2000, 1, 1, 0, 0, 0)
        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=False)],
                           started_at=(now - datetime.timedelta(minutes=i)))
                 for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])

        with mock.patch('paasta_tools.bounce_lib.datetime.datetime', utcnow=lambda: now, autospec=True):
            assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', min_task_uptime=121) == []
Example #7
0
def when_setup_service_initiated(context):
    config = {
        'master': '%s' % get_service_connection_string('mesosmaster'),
        'scheme': 'http',
        'response_timeout': 5,
    }
    with contextlib.nested(
        mock.patch(
            'paasta_tools.bounce_lib.get_happy_tasks',
            autospec=True,
            # Wrap function call so we can select a subset of tasks or test
            # intermediate steps, like when an app is not completely up
            side_effect=lambda app, _, __, ___, **kwargs: get_happy_tasks(
                app, context.service, "fake_nerve_ns", context.system_paasta_config)[:context.max_tasks],
        ),
        mock.patch('paasta_tools.bounce_lib.bounce_lock_zookeeper', autospec=True),
        mock.patch('paasta_tools.bounce_lib.create_app_lock', autospec=True),
        mock.patch('paasta_tools.bounce_lib.time.sleep', autospec=True),
        mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
        mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
        mock.patch('paasta_tools.marathon_tools.get_config_hash', autospec=True, return_value='confighash'),
        mock.patch('paasta_tools.marathon_tools.get_code_sha_from_dockerurl', autospec=True, return_value='newapp'),
        mock.patch('paasta_tools.marathon_tools.get_docker_url', autospec=True, return_value='busybox'),
        mock.patch('paasta_tools.paasta_maintenance.load_credentials', autospec=True),
        mock.patch.object(mesos.cli.master, 'CFG', config),
    ) as (
        _,
        _,
        _,
        _,
        mock_load_system_paasta_config,
        _,
        _,
        _,
        _,
        mock_load_credentials,
        _,
    ):
        mock_load_credentials.side_effect = paasta_maintenance.load_credentials(mesos_secrets='/etc/mesos-slave-secret')
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value=context.cluster)
        # 120 * 0.5 = 60 seconds
        for _ in xrange(120):
            try:
                (code, message) = setup_marathon_job.setup_service(
                    service=context.service,
                    instance=context.instance,
                    client=context.marathon_client,
                    service_marathon_config=context.new_marathon_service_config,
                    soa_dir='/nail/etc/services',
                )
                assert code == 0, message
                return
            except MarathonHttpError:
                time.sleep(0.5)
        raise Exception("Unable to acquire app lock for setup_marathon_job.setup_service")
Example #8
0
    def test_get_happy_tasks_min_task_uptime(self):
        """If we specify a minimum task age, tasks newer than that should not be considered happy."""
        now = datetime.datetime(2000, 1, 1, 0, 0, 0)
        tasks = [mock.Mock(health_check_results=[], started_at=(now - datetime.timedelta(minutes=i)))
                 for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])

        # I would have just mocked datetime.datetime.utcnow, but that's apparently difficult; I have to mock
        # datetime.datetime instead, and give it a utcnow attribute.
        with mock.patch('paasta_tools.bounce_lib.datetime.datetime', utcnow=lambda: now, autospec=True):
            assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', min_task_uptime=121) == tasks[3:]
Example #9
0
 def test_get_happy_tasks_when_some_unhealthy(self):
     """Only tasks with a passing healthcheck should be happy"""
     fake_failing_healthcheck_results = [mock.Mock(alive=False)]
     fake_successful_healthcheck_results = [mock.Mock(alive=True)]
     tasks = [mock.Mock(health_check_results=fake_failing_healthcheck_results),
              mock.Mock(health_check_results=fake_failing_healthcheck_results),
              mock.Mock(health_check_results=fake_successful_healthcheck_results)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     actual = bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config())
     expected = tasks[-1:]
     assert actual == expected
Example #10
0
def when_deploy_service_initiated(context, bounce_method, drain_method):
    with contextlib.nested(
            mock.patch(
                'paasta_tools.bounce_lib.get_happy_tasks',
                autospec=True,
                # Wrap function call so we can select a subset of tasks or test
                # intermediate steps, like when an app is not completely up
                side_effect=lambda app, _, __, ___, **kwargs: get_happy_tasks(
                    app, context.service, "fake_nerve_ns", context.
                    system_paasta_config)[:context.max_tasks],
            ),
            mock.patch('paasta_tools.bounce_lib.bounce_lock_zookeeper',
                       autospec=True),
            mock.patch('paasta_tools.bounce_lib.create_app_lock',
                       autospec=True),
            mock.patch('paasta_tools.bounce_lib.time.sleep', autospec=True),
            mock.patch(
                'paasta_tools.setup_marathon_job.load_system_paasta_config',
                autospec=True),
            mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
    ) as (
            _,
            _,
            _,
            _,
            mock_load_system_paasta_config,
            _,
    ):
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(
            return_value=context.cluster)
        # 120 * 0.5 = 60 seconds
        for _ in xrange(120):
            try:
                setup_marathon_job.deploy_service(
                    service=context.service,
                    instance=context.instance,
                    marathon_jobid=context.new_config['id'],
                    config=context.new_config,
                    client=context.marathon_client,
                    bounce_method=bounce_method,
                    drain_method_name=drain_method,
                    drain_method_params={},
                    nerve_ns=context.instance,
                    bounce_health_params={},
                    soa_dir=None,
                )
                return
            except MarathonHttpError:
                time.sleep(0.5)
        raise Exception(
            "Unable to qcuiqre app lock for setup_marathon_job.deploy_service")
Example #11
0
def get_tasks_by_state_for_app(
    app: MarathonApp,
    drain_method: drain_lib.DrainMethod,
    service: str,
    nerve_ns: str,
    bounce_health_params: Dict[str, Any],
    system_paasta_config: SystemPaastaConfig,
    log_deploy_error: LogDeployError,
    draining_hosts: Collection[str],
) -> TasksByStateDict:
    tasks_by_state: TasksByStateDict = {
        "happy": set(),
        "unhappy": set(),
        "draining": set(),
        "at_risk": set(),
    }

    happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns,
                                             system_paasta_config,
                                             **bounce_health_params)

    async def categorize_task(task: MarathonTask) -> None:
        try:
            is_draining = await drain_method.is_draining(task)
        except Exception as e:
            log_deploy_error(
                f"Ignoring {type(e).__name__} exception during is_draining of task "
                f"{task.id} {e.args}. Treating task as 'unhappy'.")
            state = "unhappy"
        else:
            if is_draining is True:
                state = "draining"
            elif task in happy_tasks:
                if task.host in draining_hosts:
                    state = "at_risk"
                else:
                    state = "happy"
            else:
                state = "unhappy"
        tasks_by_state[state].add(task)

    if app.tasks:
        a_sync.block(
            asyncio.wait,
            [
                asyncio.ensure_future(categorize_task(task))
                for task in app.tasks
            ],
        )

    return tasks_by_state
Example #12
0
def get_tasks_by_state_for_app(
    app: MarathonApp,
    drain_method: drain_lib.DrainMethod,
    service: str,
    nerve_ns: str,
    bounce_health_params: Dict[str, Any],
    system_paasta_config: SystemPaastaConfig,
    log_deploy_error: LogDeployError,
    draining_hosts: Collection[str],
) -> TasksByStateDict:
    tasks_by_state: TasksByStateDict = {
        'happy': set(),
        'unhappy': set(),
        'draining': set(),
        'at_risk': set(),
    }

    happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns,
                                             system_paasta_config,
                                             **bounce_health_params)

    async def categorize_task(task: MarathonTask) -> None:
        try:
            is_draining = await drain_method.is_draining(task)
        except Exception as e:
            log_deploy_error(
                "Ignoring exception during is_draining of task %s:"
                " %s. Treating task as 'unhappy'." % (task, e), )
            state = 'unhappy'
        else:
            if is_draining is True:
                state = 'draining'
            elif task in happy_tasks:
                if task.host in draining_hosts:
                    state = 'at_risk'
                else:
                    state = 'happy'
            else:
                state = 'unhappy'
        tasks_by_state[state].add(task)

    if app.tasks:
        a_sync.block(
            asyncio.wait,
            [
                asyncio.ensure_future(categorize_task(task))
                for task in app.tasks
            ],
        )

    return tasks_by_state
Example #13
0
 def test_get_happy_tasks_when_some_unhealthy(self):
     """Only tasks with a passing healthcheck should be happy"""
     fake_failing_healthcheck_results = [mock.Mock(alive=False)]
     fake_successful_healthcheck_results = [mock.Mock(alive=True)]
     tasks = [
         mock.Mock(health_check_results=fake_failing_healthcheck_results),
         mock.Mock(health_check_results=fake_failing_healthcheck_results),
         mock.Mock(health_check_results=fake_successful_healthcheck_results)
     ]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     actual = bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace',
                                         self.fake_system_paasta_config())
     expected = tasks[-1:]
     assert actual == expected
Example #14
0
    def test_get_happy_tasks_min_task_uptime(self):
        """If we specify a minimum task age, tasks newer than that should not be considered happy."""
        now = datetime.datetime(2000, 1, 1, 0, 0, 0)
        tasks = [mock.Mock(health_check_results=[], started_at=(now - datetime.timedelta(minutes=i)))
                 for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])

        # I would have just mocked datetime.datetime.utcnow, but that's apparently difficult; I have to mock
        # datetime.datetime instead, and give it a utcnow attribute.
        with mock.patch('paasta_tools.bounce_lib.datetime.datetime', utcnow=lambda: now, autospec=True):
            actual = bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config(),
                                                min_task_uptime=121)
            expected = tasks[3:]
            assert actual == expected
Example #15
0
    def test_get_happy_tasks_check_haproxy_when_unhealthy(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=False)]) for i in range(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with mock.patch(
            'paasta_tools.bounce_lib.get_registered_marathon_tasks', return_value=tasks[2:], autospec=True,
        ):
            actual = bounce_lib.get_happy_tasks(
                fake_app, 'service', 'namespace', self.fake_system_paasta_config(),
                check_haproxy=True,
            )
            expected = []
            assert actual == expected
Example #16
0
    def test_get_happy_tasks_check_haproxy_when_unhealthy(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=False)]) for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with contextlib.nested(
            mock.patch('paasta_tools.bounce_lib.get_registered_marathon_tasks', return_value=tasks[2:], autospec=True),
            mock.patch('paasta_tools.mesos_tools.get_mesos_slaves_grouped_by_attribute',
                       return_value={'fake_region': ['fake_host']}, autospec=True),
        ) as (
            _,
            get_mesos_slaves_grouped_by_attribute_patch,
        ):
            assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', check_haproxy=True) == []
Example #17
0
    def test_get_happy_tasks_check_haproxy_when_unhealthy(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=False)]) for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with contextlib.nested(
            mock.patch('paasta_tools.bounce_lib.get_registered_marathon_tasks', return_value=tasks[2:], autospec=True),
            mock.patch('paasta_tools.mesos_tools.get_mesos_slaves_grouped_by_attribute',
                       return_value={'fake_region': ['fake_host']}, autospec=True),
        ) as (
            _,
            get_mesos_slaves_grouped_by_attribute_patch,
        ):
            assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', check_haproxy=True) == []
Example #18
0
 def test_get_happy_tasks_with_multiple_healthchecks_fail(self):
     """Only tasks with at least one passing healthcheck should be happy"""
     fake_successful_healthcheck_results = [
         mock.Mock(alive=False),
         mock.Mock(alive=False),
     ]
     tasks = [mock.Mock(health_check_results=fake_successful_healthcheck_results)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert (
         bounce_lib.get_happy_tasks(
             fake_app, "service", "namespace", self.fake_system_paasta_config()
         )
         == []
     )
Example #19
0
def when_setup_service_initiated(context):
    with mock.patch(
        'paasta_tools.bounce_lib.get_happy_tasks',
        autospec=True,
        # Wrap function call so we can select a subset of tasks or test
        # intermediate steps, like when an app is not completely up
        side_effect=lambda app, _, __, ___, **kwargs: get_happy_tasks(
            app, context.service, "fake_nerve_ns", context.system_paasta_config,
        )[:context.max_tasks],
    ), mock.patch(
        'paasta_tools.bounce_lib.bounce_lock_zookeeper', autospec=True,
    ), mock.patch(
        'paasta_tools.bounce_lib.time.sleep', autospec=True,
    ), mock.patch(
        'paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True,
    ) as mock_load_system_paasta_config, mock.patch(
        'paasta_tools.setup_marathon_job._log', autospec=True,
    ), mock.patch(
        'paasta_tools.marathon_tools.get_config_hash', autospec=True, return_value='confighash',
    ), mock.patch(
        'paasta_tools.marathon_tools.get_code_sha_from_dockerurl', autospec=True, return_value='newapp',
    ), mock.patch(
        'paasta_tools.utils.InstanceConfig.get_docker_url', autospec=True, return_value='busybox',
    ), mock.patch(
        'paasta_tools.mesos_maintenance.get_principal', autospec=True,
    ) as mock_get_principal, mock.patch(
        'paasta_tools.mesos_maintenance.get_secret', autospec=True,
    ) as mock_get_secret:
        credentials = mesos_maintenance.load_credentials(mesos_secrets='/etc/mesos-slave-secret')
        mock_get_principal.return_value = credentials.principal
        mock_get_secret.return_value = credentials.secret
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value=context.cluster)
        # 120 * 0.5 = 60 seconds
        for _ in range(120):
            try:
                marathon_apps = marathon_tools.get_all_marathon_apps(context.marathon_client, embed_tasks=True)
                (code, message, bounce_again) = setup_marathon_job.setup_service(
                    service=context.service,
                    instance=context.instance,
                    client=context.marathon_client,
                    marathon_apps=marathon_apps,
                    service_marathon_config=context.new_marathon_service_config,
                    soa_dir='/nail/etc/services',
                )
                assert code == 0, message
                return
            except MarathonHttpError:
                time.sleep(0.5)
        raise Exception("Unable to acquire app lock for setup_marathon_job.setup_service")
Example #20
0
    def test_get_happy_tasks_check_haproxy_multiple_locations(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [
            mock.Mock(health_check_results=[mock.Mock(alive=True)])
            for i in xrange(5)
        ]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with contextlib.nested(
                mock.patch(
                    'paasta_tools.bounce_lib.get_registered_marathon_tasks',
                    side_effect=[tasks[2:3], tasks[3:]],
                    autospec=True,
                ),
                mock.patch(
                    'paasta_tools.mesos_tools.get_mesos_slaves_grouped_by_attribute',
                    autospec=True),
        ) as (
                get_registered_marathon_tasks_patch,
                get_mesos_slaves_grouped_by_attribute_patch,
        ):
            get_mesos_slaves_grouped_by_attribute_patch.return_value = {
                'fake_region': ['fake_host1'],
                'fake_other_region': ['fake_host2'],
            }
            actual = bounce_lib.get_happy_tasks(
                fake_app,
                'service',
                'namespace',
                self.fake_system_paasta_config(),
                check_haproxy=True)
            expected = tasks[2:]
            assert actual == expected

            get_registered_marathon_tasks_patch.assert_any_call(
                'fake_host1',
                123456,
                utils.DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
                'service.namespace',
                tasks,
            )
            get_registered_marathon_tasks_patch.assert_any_call(
                'fake_host2',
                123456,
                utils.DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
                'service.namespace',
                tasks,
            )
Example #21
0
def when_there_are_num_which_tasks(context, num, which, state):
    context.max_tasks = int(num)
    app_id = which_id(context, which)

    # 120 * 0.5 = 60 seconds
    for _ in xrange(120):
        app = context.marathon_client.get_app(app_id, embed_tasks=True)
        happy_count = len(get_happy_tasks(app, context.service, "fake_nerve_ns", context.system_paasta_config))
        if state == "healthy":
            if happy_count >= context.max_tasks:
                return
        elif state == "unhealthy":
            if len(app.tasks) - happy_count >= context.max_tasks:
                return
        time.sleep(0.5)
    raise Exception("timed out waiting for %d %s tasks on %s; there are %s" %
                    (context.max_tasks, state, app_id, app.tasks))
Example #22
0
def when_there_are_num_which_tasks(context, num, which, state):
    context.max_tasks = int(num)
    app_id = which_id(context, which)

    # 120 * 0.5 = 60 seconds
    for _ in xrange(120):
        app = context.marathon_client.get_app(app_id, embed_tasks=True)
        happy_count = len(get_happy_tasks(app, context.service, "fake_nerve_ns"))
        if state == "healthy":
            if happy_count >= context.max_tasks:
                return
        elif state == "unhealthy":
            if len(app.tasks) - happy_count >= context.max_tasks:
                return
        time.sleep(0.5)
    raise Exception("timed out waiting for %d %s tasks on %s; there are %d" %
                    (context.max_tasks, state, app_id, app.tasks))
Example #23
0
    def test_get_happy_tasks_min_task_uptime_when_unhealthy(self):
        """If we specify a minimum task age, tasks newer than that should not be considered happy."""
        now = datetime.datetime(2000, 1, 1, 0, 0, 0)
        tasks = [mock.Mock(
            health_check_results=[mock.Mock(alive=False)],
            started_at=(now - datetime.timedelta(minutes=i)),
        )
            for i in range(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])

        with mock.patch('paasta_tools.bounce_lib.datetime.datetime', utcnow=lambda: now, autospec=True):
            actual = bounce_lib.get_happy_tasks(
                fake_app, 'service', 'namespace', self.fake_system_paasta_config(),
                min_task_uptime=121,
            )
            expected = []
            assert actual == expected
Example #24
0
def get_old_happy_unhappy_draining_tasks_for_app(app, drain_method, service, nerve_ns, bounce_health_params):
    tasks_by_state = {
        'happy': set(),
        'unhappy': set(),
        'draining': set(),
    }

    happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params)
    for task in app.tasks:
        if drain_method.is_draining(task):
            state = 'draining'
        elif task in happy_tasks:
            state = 'happy'
        else:
            state = 'unhappy'
        tasks_by_state[state].add(task)

    return tasks_by_state
Example #25
0
def get_old_happy_unhappy_draining_tasks_for_app(app, drain_method, service, nerve_ns, bounce_health_params):
    tasks_by_state = {
        'happy': set(),
        'unhappy': set(),
        'draining': set(),
    }

    happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params)
    for task in app.tasks:
        if drain_method.is_draining(task):
            state = 'draining'
        elif task in happy_tasks:
            state = 'happy'
        else:
            state = 'unhappy'
        tasks_by_state[state].add(task)

    return tasks_by_state
Example #26
0
    def test_get_happy_tasks_check_haproxy(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=True)]) for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with contextlib.nested(
            mock.patch('paasta_tools.bounce_lib.get_registered_marathon_tasks', return_value=tasks[2:], autospec=True),
            mock.patch('paasta_tools.bounce_lib.mesos_tools.get_mesos_slaves_grouped_by_attribute',
                       return_value={'fake_region': [{'hostname': 'fakehost'}]}, autospec=True),
            mock.patch('paasta_tools.mesos_tools.get_slaves',
                       return_value=[], autospec=True),
        ) as (
            _,
            get_mesos_slaves_grouped_by_attribute_patch,
            __
        ):
            actual = bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config(),
                                                check_haproxy=True)
            expected = tasks[2:]
            assert actual == expected
Example #27
0
def get_tasks_by_state_for_app(
    app,
    drain_method,
    service,
    nerve_ns,
    bounce_health_params,
    system_paasta_config,
    log_deploy_error,
    draining_hosts,
):
    tasks_by_state = {
        'happy': set(),
        'unhappy': set(),
        'draining': set(),
        'at_risk': set(),
    }

    happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns,
                                             system_paasta_config,
                                             **bounce_health_params)
    for task in app.tasks:
        try:
            is_draining = drain_method.is_draining(task)
        except Exception as e:
            log_deploy_error(
                "Ignoring exception during is_draining of task %s:"
                " %s. Treating task as 'unhappy'." % (task, e), )
            state = 'unhappy'
        else:
            if is_draining is True:
                state = 'draining'
            elif task in happy_tasks:
                if task.host in draining_hosts:
                    state = 'at_risk'
                else:
                    state = 'happy'
            else:
                state = 'unhappy'
        tasks_by_state[state].add(task)

    return tasks_by_state
Example #28
0
    def test_get_happy_tasks_check_haproxy_multiple_locations(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=True)]) for i in xrange(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with contextlib.nested(
            mock.patch(
                'paasta_tools.bounce_lib.get_registered_marathon_tasks',
                side_effect=[tasks[2:3], tasks[3:]], autospec=True,
            ),
            mock.patch('paasta_tools.mesos_tools.get_mesos_slaves_grouped_by_attribute', autospec=True),
            mock.patch('paasta_tools.mesos_tools.get_slaves', return_value=[], autospec=True),
        ) as (
            get_registered_marathon_tasks_patch,
            get_mesos_slaves_grouped_by_attribute_patch,
            _
        ):
            get_mesos_slaves_grouped_by_attribute_patch.return_value = {
                'fake_region': [{'hostname': 'fake_host1'}],
                'fake_other_region': [{'hostname': 'fake_host2'}]
            }
            actual = bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config(),
                                                check_haproxy=True)
            expected = tasks[2:]
            assert actual == expected

            get_registered_marathon_tasks_patch.assert_any_call(
                'fake_host1',
                123456,
                utils.DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
                'service.namespace',
                tasks,
            )
            get_registered_marathon_tasks_patch.assert_any_call(
                'fake_host2',
                123456,
                utils.DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
                'service.namespace',
                tasks,
            )
Example #29
0
def when_deploy_service_initiated(context, bounce_method, drain_method):
    with contextlib.nested(
        mock.patch(
            "paasta_tools.bounce_lib.get_happy_tasks",
            autospec=True,
            # Wrap function call so we can select a subset of tasks or test
            # intermediate steps, like when an app is not completely up
            side_effect=lambda app, _, __, ___, **kwargs: get_happy_tasks(
                app, context.service, "fake_nerve_ns", context.system_paasta_config
            )[: context.max_tasks],
        ),
        mock.patch("paasta_tools.bounce_lib.bounce_lock_zookeeper", autospec=True),
        mock.patch("paasta_tools.bounce_lib.create_app_lock", autospec=True),
        mock.patch("paasta_tools.bounce_lib.time.sleep", autospec=True),
        mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True),
        mock.patch("paasta_tools.setup_marathon_job._log", autospec=True),
    ) as (_, _, _, _, mock_load_system_paasta_config, _):
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value=context.cluster)
        # 120 * 0.5 = 60 seconds
        for _ in xrange(120):
            try:
                setup_marathon_job.deploy_service(
                    service=context.service,
                    instance=context.instance,
                    marathon_jobid=context.new_config["id"],
                    config=context.new_config,
                    client=context.marathon_client,
                    bounce_method=bounce_method,
                    drain_method_name=drain_method,
                    drain_method_params={},
                    nerve_ns=context.instance,
                    bounce_health_params={},
                    soa_dir=None,
                )
                return
            except MarathonHttpError:
                time.sleep(0.5)
        raise Exception("Unable to qcuiqre app lock for setup_marathon_job.deploy_service")
Example #30
0
 def test_get_happy_tasks_when_running_with_healthchecks_defined(self):
     """All running tasks with no health check results are unhealthy if the app defines healthchecks"""
     now = datetime.datetime(2000, 1, 1, 0, 0, 0, tzinfo=pytz.utc)
     tasks = [
         mock.Mock(
             health_check_results=[],
             started_at=(now - datetime.timedelta(minutes=i)),
         ) for i in range(5)
     ]
     fake_app = mock.Mock(
         tasks=tasks,
         health_checks=[
             mock.Mock(grace_period_seconds=1234, interval_seconds=4321)
         ],
     )
     with mock.patch(
             "paasta_tools.marathon_tools.datetime.datetime",
             now=lambda x: now,
             autospec=True,
     ):
         assert (bounce_lib.get_happy_tasks(
             fake_app, "service", "namespace",
             self.fake_system_paasta_config()) == [])
Example #31
0
def get_tasks_by_state_for_app(app, drain_method, service, nerve_ns, bounce_health_params,
                               system_paasta_config):
    tasks_by_state = {
        'happy': set(),
        'unhappy': set(),
        'draining': set(),
        'at_risk': set(),
    }

    happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, system_paasta_config, **bounce_health_params)
    draining_hosts = get_draining_hosts()
    for task in app.tasks:
        if drain_method.is_draining(task):
            state = 'draining'
        elif task in happy_tasks:
            if task.host in draining_hosts:
                state = 'at_risk'
            else:
                state = 'happy'
        else:
            state = 'unhappy'
        tasks_by_state[state].add(task)

    return tasks_by_state
Example #32
0
    def test_get_happy_tasks_check_each_host(self):
        """If we specify that a task should be in haproxy, don't call it happy unless it's in haproxy."""

        tasks = [mock.Mock(health_check_results=[mock.Mock(alive=True)], host='fake_host1') for i in range(5)]
        fake_app = mock.Mock(tasks=tasks, health_checks=[])
        with mock.patch(
            'paasta_tools.bounce_lib.get_registered_marathon_tasks',
            side_effect=[([t] if i >= 2 else []) for i, t in enumerate(tasks)], autospec=True,
        ) as get_registered_marathon_tasks_patch:
            actual = bounce_lib.get_happy_tasks(
                fake_app, 'service', 'namespace', self.fake_system_paasta_config(),
                check_haproxy=True,
            )
            expected = tasks[2:]
            assert actual == expected

            for task in tasks:
                get_registered_marathon_tasks_patch.assert_any_call(
                    'fake_host1',
                    123456,
                    utils.DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
                    'service.namespace',
                    [task],
                )
Example #33
0
 def test_get_happy_tasks_when_all_healthy(self):
     """All tasks with only passing healthchecks should be happy"""
     tasks = [mock.Mock(health_check_results=[mock.Mock(alive=True)]) for _ in range(5)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config()) == tasks
Example #34
0
 def test_get_happy_tasks_when_all_healthy(self):
     """All tasks with only passing healthchecks should be happy"""
     tasks = [mock.Mock(health_check_results=[mock.Mock(alive=True)]) for _ in xrange(5)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config()) == tasks
Example #35
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :returns: A tuple of (status, output) to be used with send_sensu_event"""

    def log_deploy_error(errormsg, level='event'):
        return _log(
            service=service,
            line=errormsg,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance
        )

    short_id = marathon_tools.format_job_id(service, instance)

    cluster = load_system_paasta_config().get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service, instance, client, embed_failures=True)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" % len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service, nerve_ns, **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks = get_old_happy_unhappy_draining_tasks(
        other_apps,
        drain_method,
        service,
        nerve_ns,
        bounce_health_params
    )

    if new_app_running:
        protected_draining_tasks = set()
        if new_app.instances < config['instances']:
            client.scale_app(app_id=new_app.id, instances=config['instances'], force=True)
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(min(len(new_app.tasks), new_app.instances) - config['instances'], 0)
            task_dict = get_old_happy_unhappy_draining_tasks_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks), num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(scaling_app_draining_tasks[:tasks_to_move_draining])
            protected_draining_tasks.update(scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks), num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_happy = min(len(scaling_app_happy_tasks), num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]
        # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
        # `paasta mark-for-deployment`), then we should undrain them.
        for task in new_app.tasks:
            if task not in protected_draining_tasks:
                drain_method.stop_draining(task)

    # Re-drain any already draining tasks on old apps
    for tasks in old_app_draining_tasks.values():
        for task in tasks:
            drain_method.drain(task)

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_happy_tasks=old_app_live_happy_tasks,
                    old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        loglines = ['Exception raised during deploy of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Example #36
0
 def test_get_happy_tasks_when_running_with_healthchecks_defined(self):
     """All running tasks with no health check results are unhealthy if the app defines healthchecks"""
     tasks = [mock.Mock(health_check_results=[]) for _ in range(5)]
     fake_app = mock.Mock(tasks=tasks, health_checks=["fake_healthcheck_definition"])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config()) == []
Example #37
0
 def test_get_happy_tasks_with_multiple_healthchecks_fail(self):
     """Only tasks with at least one passing healthcheck should be happy"""
     fake_successful_healthcheck_results = [mock.Mock(alive=False), mock.Mock(alive=False)]
     tasks = [mock.Mock(health_check_results=fake_successful_healthcheck_results)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace') == []
Example #38
0
 def test_get_happy_tasks_when_running_without_healthchecks_defined(self):
     """All running tasks with no health checks results are healthy if the app does not define healthchecks"""
     tasks = [mock.Mock(health_check_results=[]) for _ in xrange(5)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace') == tasks
Example #39
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    marathon_apps,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
    bounce_margin_factor=1.0,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :param bounce_margin_factor: the multiplication factor used to calculate the number of instances to be drained
    :returns: A tuple of (status, output) to be used with send_sensu_event"""
    def log_deploy_error(errormsg, level='event'):
        return _log(service=service,
                    line=errormsg,
                    component='deploy',
                    level='event',
                    cluster=cluster,
                    instance=instance)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service, instance,
                                                     marathon_apps)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" %
                             len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service,
                                                     nerve_ns,
                                                     system_paasta_config,
                                                     **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    (
        old_app_live_happy_tasks,
        old_app_live_unhappy_tasks,
        old_app_draining_tasks,
        old_app_at_risk_tasks,
    ) = get_tasks_by_state(
        other_apps,
        drain_method,
        service,
        nerve_ns,
        bounce_health_params,
        system_paasta_config,
    )

    if new_app_running:
        num_at_risk_tasks = get_num_at_risk_tasks(new_app)
        if new_app.instances < config['instances'] + num_at_risk_tasks:
            log.info("Scaling %s from %d to %d instances." %
                     (new_app.id, new_app.instances,
                      config['instances'] + num_at_risk_tasks))
            client.scale_app(app_id=new_app.id,
                             instances=config['instances'] + num_at_risk_tasks,
                             force=True)
        # If we have more than the specified number of instances running, we will want to drain some of them.
        # We will start by draining any tasks running on at-risk hosts.
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(
                min(len(new_app.tasks), new_app.instances) -
                config['instances'], 0)
            task_dict = get_tasks_by_state_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
                system_paasta_config,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])
            scaling_app_at_risk_tasks = list(task_dict['at_risk'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks),
                                         num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks),
                                        num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(
                scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_at_risk = min(len(scaling_app_at_risk_tasks),
                                        num_tasks_to_scale)
            old_app_at_risk_tasks[new_app.id] = set(
                scaling_app_at_risk_tasks[:tasks_to_move_at_risk])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_at_risk

            tasks_to_move_happy = min(len(scaling_app_happy_tasks),
                                      num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(
                scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]

        # TODO: don't take actions in deploy_service.
        undrain_tasks(
            to_undrain=new_app.tasks,
            leave_draining=old_app_draining_tasks.get(new_app.id, []),
            drain_method=drain_method,
            log_deploy_error=log_deploy_error,
        )

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        do_bounce(
            bounce_func=bounce_func,
            drain_method=drain_method,
            config=config,
            new_app_running=new_app_running,
            happy_new_tasks=happy_new_tasks,
            old_app_live_happy_tasks=old_app_live_happy_tasks,
            old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
            old_app_draining_tasks=old_app_draining_tasks,
            old_app_at_risk_tasks=old_app_at_risk_tasks,
            service=service,
            bounce_method=bounce_method,
            serviceinstance=serviceinstance,
            cluster=cluster,
            instance=instance,
            marathon_jobid=marathon_jobid,
            client=client,
            soa_dir=soa_dir,
            bounce_margin_factor=bounce_margin_factor,
        )
    except bounce_lib.LockHeldException:
        logline = 'Failed to get lock to create marathon app for %s.%s' % (
            service, instance)
        log_deploy_error(logline, level='debug')
        return (0, "Couldn't get marathon lock, skipping until next time")
    except Exception:
        logline = 'Exception raised during deploy of service %s:\n%s' % (
            service, traceback.format_exc())
        log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Example #40
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    marathon_apps,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
    bounce_margin_factor=1.0,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :param bounce_margin_factor: the multiplication factor used to calculate the number of instances to be drained
    :returns: A tuple of (status, output) to be used with send_sensu_event"""

    def log_deploy_error(errormsg, level='event'):
        return _log(
            service=service,
            line=errormsg,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance
        )

    short_id = marathon_tools.format_job_id(service, instance)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service, instance, marathon_apps)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" % len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service, nerve_ns, system_paasta_config,
                                                     **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    (old_app_live_happy_tasks,
     old_app_live_unhappy_tasks,
     old_app_draining_tasks,
     old_app_at_risk_tasks,
     ) = get_tasks_by_state(
        other_apps,
        drain_method,
        service,
        nerve_ns,
        bounce_health_params,
        system_paasta_config,
    )

    if new_app_running:
        num_at_risk_tasks = get_num_at_risk_tasks(new_app)
        if new_app.instances < config['instances'] + num_at_risk_tasks:
            log.info("Scaling %s from %d to %d instances." %
                     (new_app.id, new_app.instances, config['instances'] + num_at_risk_tasks))
            client.scale_app(app_id=new_app.id, instances=config['instances'] + num_at_risk_tasks, force=True)
        # If we have more than the specified number of instances running, we will want to drain some of them.
        # We will start by draining any tasks running on at-risk hosts.
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(min(len(new_app.tasks), new_app.instances) - config['instances'], 0)
            task_dict = get_tasks_by_state_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
                system_paasta_config,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])
            scaling_app_at_risk_tasks = list(task_dict['at_risk'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks), num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks), num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_at_risk = min(len(scaling_app_at_risk_tasks), num_tasks_to_scale)
            old_app_at_risk_tasks[new_app.id] = set(scaling_app_at_risk_tasks[:tasks_to_move_at_risk])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_at_risk

            tasks_to_move_happy = min(len(scaling_app_happy_tasks), num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]

        # TODO: don't take actions in deploy_service.
        undrain_tasks(
            to_undrain=new_app.tasks,
            leave_draining=old_app_draining_tasks.get(new_app.id, []),
            drain_method=drain_method,
            log_deploy_error=log_deploy_error,
        )

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_happy_tasks=old_app_live_happy_tasks,
                    old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    old_app_at_risk_tasks=old_app_at_risk_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                    bounce_margin_factor=bounce_margin_factor,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        logline = 'Exception raised during deploy of service %s:\n%s' % (service, traceback.format_exc())
        log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Example #41
0
 def test_get_happy_tasks_when_running_with_healthchecks_defined(self):
     """All running tasks with no health check results are unhealthy if the app defines healthchecks"""
     tasks = [mock.Mock(health_check_results=[]) for _ in xrange(5)]
     fake_app = mock.Mock(tasks=tasks, health_checks=["fake_healthcheck_definition"])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config()) == []
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :returns: A tuple of (status, output) to be used with send_sensu_event"""

    def log_deploy_error(errormsg, level="event"):
        return _log(
            service=service, line=errormsg, component="deploy", level="event", cluster=cluster, instance=instance
        )

    short_id = marathon_tools.format_job_id(service, instance)

    cluster = load_system_paasta_config().get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service, instance, client, embed_failures=True)
    new_app_list = [a for a in existing_apps if a.id == "/%s" % config["id"]]
    other_apps = [a for a in existing_apps if a.id != "/%s" % config["id"]]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" % len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service, nerve_ns, **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = "ERROR: drain_method not recognized: %s. Must be one of (%s)" % (
            drain_method_name,
            ", ".join(drain_lib.list_drain_methods()),
        )
        log_deploy_error(errormsg)
        return (1, errormsg)

    old_app_live_tasks, old_app_draining_tasks = get_old_live_draining_tasks(other_apps, drain_method)

    # Re-drain any already draining tasks on old apps
    for tasks in old_app_draining_tasks.values():
        for task in tasks:
            drain_method.drain(task)

    # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
    # `paasta mark-for-deployment`), then we should undrain them.
    if new_app_running:
        for task in new_app.tasks:
            drain_method.stop_draining(task)

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = "ERROR: bounce_method not recognized: %s. Must be one of (%s)" % (
                bounce_method,
                ", ".join(bounce_lib.list_bounce_methods()),
            )
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_tasks=old_app_live_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        loglines = ["Exception raised during deploy of service %s:" % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            log_deploy_error(logline, level="debug")
        raise

    return (0, "Service deployed.")
Example #43
0
def deploy_service(
    service: str,
    instance: str,
    marathon_jobid: str,
    config: marathon_tools.FormattedMarathonAppDict,
    clients: marathon_tools.MarathonClients,
    marathon_apps_with_clients: Collection[Tuple[MarathonApp, MarathonClient]],
    bounce_method: str,
    drain_method_name: str,
    drain_method_params: Dict[str, Any],
    nerve_ns: str,
    bounce_health_params: Dict[str, Any],
    soa_dir: str,
    job_config: marathon_tools.MarathonServiceConfig,
    bounce_margin_factor: float = 1.0,
) -> Tuple[int, str, Optional[float]]:
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param clients: A MarathonClients object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :param bounce_margin_factor: the multiplication factor used to calculate the number of instances to be drained
    :returns: A tuple of (status, output, bounce_in_seconds) to be used with send_sensu_event"""
    def log_deploy_error(errormsg: str, level: str = 'event') -> None:
        return _log(
            service=service,
            line=errormsg,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
        )

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    existing_apps_with_clients = marathon_tools.get_matching_apps_with_clients(
        service=service,
        instance=instance,
        marathon_apps_with_clients=marathon_apps_with_clients,
    )

    new_client = clients.get_current_client_for_service(job_config)

    new_apps_with_clients_list: List[Tuple[MarathonApp, MarathonClient]] = []
    other_apps_with_clients: List[Tuple[MarathonApp, MarathonClient]] = []

    for a, c in existing_apps_with_clients:
        if a.id == '/%s' % config['id'] and c == new_client:
            new_apps_with_clients_list.append((a, c))
        else:
            other_apps_with_clients.append((a, c))

    serviceinstance = "%s.%s" % (service, instance)

    if new_apps_with_clients_list:
        new_app, new_client = new_apps_with_clients_list[0]
        if len(new_apps_with_clients_list) != 1:
            raise ValueError(
                "Only expected one app per ID per shard; found %d" %
                len(new_apps_with_clients_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(
            new_app,
            service,
            nerve_ns,
            system_paasta_config,
            **bounce_health_params,
        )
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg, None)

    try:
        draining_hosts = get_draining_hosts()
    except ReadTimeout as e:
        errormsg = "ReadTimeout encountered trying to get draining hosts: %s" % e
        return (1, errormsg, 60)

    (
        old_app_live_happy_tasks,
        old_app_live_unhappy_tasks,
        old_app_draining_tasks,
        old_app_at_risk_tasks,
    ) = get_tasks_by_state(
        other_apps_with_clients=other_apps_with_clients,
        drain_method=drain_method,
        service=service,
        nerve_ns=nerve_ns,
        bounce_health_params=bounce_health_params,
        system_paasta_config=system_paasta_config,
        log_deploy_error=log_deploy_error,
        draining_hosts=draining_hosts,
    )

    # The first thing we need to do is take up the "slack" of old apps, to stop
    # them from launching new things that we are going to have to end up draining
    # and killing anyway.
    for a, c in other_apps_with_clients:
        marathon_tools.take_up_slack(app=a, client=c)

    num_at_risk_tasks = 0
    if new_app_running:
        num_at_risk_tasks = get_num_at_risk_tasks(
            new_app, draining_hosts=draining_hosts)
        if new_app.instances < config['instances'] + num_at_risk_tasks:
            log.info("Scaling %s up from %d to %d instances." %
                     (new_app.id, new_app.instances,
                      config['instances'] + num_at_risk_tasks))
            new_client.scale_app(app_id=new_app.id,
                                 instances=config['instances'] +
                                 num_at_risk_tasks,
                                 force=True)
        # If we have more than the specified number of instances running, we will want to drain some of them.
        # We will start by draining any tasks running on at-risk hosts.
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(
                min(len(new_app.tasks), new_app.instances) -
                config['instances'], 0)
            task_dict = get_tasks_by_state_for_app(
                app=new_app,
                drain_method=drain_method,
                service=service,
                nerve_ns=nerve_ns,
                bounce_health_params=bounce_health_params,
                system_paasta_config=system_paasta_config,
                log_deploy_error=log_deploy_error,
                draining_hosts=draining_hosts,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])
            scaling_app_at_risk_tasks = list(task_dict['at_risk'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks),
                                         num_tasks_to_scale)
            old_app_draining_tasks[(new_app.id, new_client)] = set(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks),
                                        num_tasks_to_scale)
            old_app_live_unhappy_tasks[(new_app.id, new_client)] = set(
                scaling_app_unhappy_tasks[:tasks_to_move_unhappy], )
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_at_risk = min(len(scaling_app_at_risk_tasks),
                                        num_tasks_to_scale)
            old_app_at_risk_tasks[(new_app.id, new_client)] = set(
                scaling_app_at_risk_tasks[:tasks_to_move_at_risk])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_at_risk

            tasks_to_move_happy = min(len(scaling_app_happy_tasks),
                                      num_tasks_to_scale)
            old_app_live_happy_tasks[(new_app.id, new_client)] = set(
                scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]

            # slack represents remaining the extra remaining instances that are configured
            # in marathon that don't have a launched task yet. When scaling down we want to
            # reduce this slack so marathon doesn't get a chance to launch a new task in
            # that space that we will then have to drain and kill again.
            marathon_tools.take_up_slack(client=new_client, app=new_app)

        # TODO: don't take actions in deploy_service.
        undrain_tasks(
            to_undrain=new_app.tasks,
            leave_draining=old_app_draining_tasks.get((new_app.id, new_client),
                                                      []),
            drain_method=drain_method,
            log_deploy_error=log_deploy_error,
        )

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg, None)

        bounce_again_in_seconds = do_bounce(
            bounce_func=bounce_func,
            drain_method=drain_method,
            config=config,
            new_app_running=new_app_running,
            happy_new_tasks=happy_new_tasks,
            old_app_live_happy_tasks=old_app_live_happy_tasks,
            old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
            old_app_draining_tasks=old_app_draining_tasks,
            old_app_at_risk_tasks=old_app_at_risk_tasks,
            service=service,
            bounce_method=bounce_method,
            serviceinstance=serviceinstance,
            cluster=cluster,
            instance=instance,
            marathon_jobid=marathon_jobid,
            clients=clients,
            soa_dir=soa_dir,
            job_config=job_config,
            bounce_margin_factor=bounce_margin_factor,
        )
    except bounce_lib.LockHeldException:
        logline = 'Failed to get lock to create marathon app for %s.%s' % (
            service, instance)
        log_deploy_error(logline, level='debug')
        return (0, "Couldn't get marathon lock, skipping until next time",
                None)
    except Exception:
        logline = 'Exception raised during deploy of service %s:\n%s' % (
            service, traceback.format_exc())
        log_deploy_error(logline, level='debug')
        raise
    if num_at_risk_tasks:
        bounce_again_in_seconds = 60
    elif new_app_running:
        if new_app.instances > config['instances']:
            bounce_again_in_seconds = 60
    return (0, 'Service deployed.', bounce_again_in_seconds)
Example #44
0
 def test_get_happy_tasks_when_running_without_healthchecks_defined(self):
     """All running tasks with no health checks results are healthy if the app does not define healthchecks"""
     tasks = [mock.Mock(health_check_results=[]) for _ in xrange(5)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service',
                                       'namespace') == tasks
Example #45
0
 def test_get_happy_tasks_with_multiple_healthchecks_fail(self):
     """Only tasks with at least one passing healthcheck should be happy"""
     fake_successful_healthcheck_results = [mock.Mock(alive=False), mock.Mock(alive=False)]
     tasks = [mock.Mock(health_check_results=fake_successful_healthcheck_results)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace', self.fake_system_paasta_config()) == []
Example #46
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :returns: A tuple of (status, output) to be used with send_sensu_event"""
    def log_deploy_error(errormsg, level='event'):
        return _log(service=service,
                    line=errormsg,
                    component='deploy',
                    level='event',
                    cluster=cluster,
                    instance=instance)

    short_id = marathon_tools.format_job_id(service, instance)

    cluster = load_system_paasta_config().get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service,
                                                     instance,
                                                     client,
                                                     embed_failures=True)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" %
                             len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service,
                                                     nerve_ns,
                                                     **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks = get_old_happy_unhappy_draining_tasks(
        other_apps, drain_method, service, nerve_ns, bounce_health_params)

    if new_app_running:
        protected_draining_tasks = set()
        if new_app.instances < config['instances']:
            client.scale_app(app_id=new_app.id,
                             instances=config['instances'],
                             force=True)
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(
                min(len(new_app.tasks), new_app.instances) -
                config['instances'], 0)
            task_dict = get_old_happy_unhappy_draining_tasks_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks),
                                         num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            protected_draining_tasks.update(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks),
                                        num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(
                scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_happy = min(len(scaling_app_happy_tasks),
                                      num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(
                scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]
        # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
        # `paasta mark-for-deployment`), then we should undrain them.
        for task in new_app.tasks:
            if task not in protected_draining_tasks:
                drain_method.stop_draining(task)

    # Re-drain any already draining tasks on old apps
    for tasks in old_app_draining_tasks.values():
        for task in tasks:
            drain_method.drain(task)

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_happy_tasks=old_app_live_happy_tasks,
                    old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        loglines = ['Exception raised during deploy of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Example #47
0
 def test_get_happy_tasks_with_multiple_healthchecks_success(self):
     """All tasks with at least one passing healthcheck should be happy"""
     fake_successful_healthcheck_results = [mock.Mock(alive=True), mock.Mock(alive=False)]
     tasks = [mock.Mock(health_check_results=fake_successful_healthcheck_results)]
     fake_app = mock.Mock(tasks=tasks, health_checks=[])
     assert bounce_lib.get_happy_tasks(fake_app, 'service', 'namespace') == tasks