def test_deploy_service_already_bouncing(self):
        fake_bounce = "areallygoodbouncestrategy"
        fake_drain_method = "noop"
        fake_name = "how_many_strings"
        fake_instance = "will_i_need_to_think_of"
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance, "gityourmom", "configyourdad")
        fake_config = {"id": fake_id, "instances": 2}

        old_app_id = "%s2" % fake_id
        old_task = mock.Mock(id="old_task_id", app_id=old_app_id)
        old_app = mock.Mock(id=old_app_id, tasks=[old_task])

        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=[old_app]), kill_task=mock.Mock(spec=lambda app_id, id, scale=False: None)
        )

        fake_bounce_func = mock.create_autospec(
            bounce_lib.brutal_bounce, return_value={"create_app": True, "tasks_to_drain": [old_task]}
        )

        fake_short_id = marathon_tools.format_job_id(fake_name, fake_instance)

        with contextlib.nested(
            mock.patch("paasta_tools.bounce_lib.get_bounce_method_func", return_value=fake_bounce_func, autospec=True),
            mock.patch(
                "paasta_tools.bounce_lib.bounce_lock_zookeeper", side_effect=bounce_lib.LockHeldException, autospec=True
            ),
            mock.patch(
                "paasta_tools.bounce_lib.get_happy_tasks", autospec=True, side_effect=lambda x, _, __, **kwargs: x
            ),
            mock.patch("paasta_tools.setup_marathon_job._log", autospec=True),
            mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True),
        ) as (_, _, _, _, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value="fake_cluster")
            result = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir="fake_soa_dir",
            )
            assert result == (1, "Instance %s is already being bounced." % fake_short_id)
    def test_deploy_service_unknown_drain_method(self):
        fake_bounce = "exists"
        fake_drain_method = "doesntexist"
        fake_name = "whoa"
        fake_instance = "the_earth_is_tiny"
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=("%s2" % fake_id), tasks=[])]
        fake_client = mock.MagicMock(list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {"id": fake_id, "instances": 2}

        errormsg = "ERROR: drain_method not recognized: doesntexist. Must be one of (exists1, exists2)"
        expected = (1, errormsg)

        with contextlib.nested(
            mock.patch("paasta_tools.setup_marathon_job._log", autospec=True),
            mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True),
            mock.patch("paasta_tools.drain_lib._drain_methods", new={"exists1": mock.Mock(), "exists2": mock.Mock()}),
        ) as (mock_log, mock_load_system_paasta_config, mock_drain_methods):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value="fake_cluster")
            actual = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir="fake_soa_dir",
            )
            assert mock_log.call_count == 1
        assert expected == actual
Ejemplo n.º 3
0
def test_status_mesos_tasks_verbose():
    with contextlib.nested(
        mock.patch("paasta_tools.mesos_tools.get_running_tasks_from_active_frameworks", autospec=True),
        mock.patch("paasta_tools.mesos_tools.get_non_running_tasks_from_active_frameworks", autospec=True),
        mock.patch("paasta_tools.mesos_tools.format_running_mesos_task_row", autospec=True),
        mock.patch("paasta_tools.mesos_tools.format_non_running_mesos_task_row", autospec=True),
    ) as (
        get_running_mesos_tasks_patch,
        get_non_running_mesos_tasks_patch,
        format_running_mesos_task_row_patch,
        format_non_running_mesos_task_row_patch,
    ):
        get_running_mesos_tasks_patch.return_value = ["doing a lap"]
        get_non_running_mesos_tasks_patch.return_value = ["eating a burrito"]
        format_running_mesos_task_row_patch.return_value = ["id", "host", "mem", "cpu", "time"]
        format_non_running_mesos_task_row_patch.return_value = ["id", "host", "time", "state"]
        job_id = (format_job_id("fake_service", "fake_instance"),)

        def get_short_task_id(_):
            return "short_task_id"

        actual = mesos_tools.status_mesos_tasks_verbose(job_id, get_short_task_id)
        assert "Running Tasks" in actual
        assert "Non-Running Tasks" in actual
        format_running_mesos_task_row_patch.assert_called_once_with("doing a lap", get_short_task_id)
        format_non_running_mesos_task_row_patch.assert_called_once_with("eating a burrito", get_short_task_id)
Ejemplo n.º 4
0
def autoscale_services(soa_dir=DEFAULT_SOA_DIR):
    try:
        with create_autoscaling_lock():
            cluster = load_system_paasta_config().get_cluster()
            configs = get_configs_of_services_to_scale(cluster=cluster, soa_dir=soa_dir)
            if configs:
                marathon_config = load_marathon_config()
                marathon_client = get_marathon_client(
                    url=marathon_config.get_url(),
                    user=marathon_config.get_username(),
                    passwd=marathon_config.get_password())
                all_marathon_tasks = marathon_client.list_tasks()
                all_mesos_tasks = get_running_tasks_from_active_frameworks('')  # empty string matches all app ids
                with ZookeeperPool():
                    for config in configs:
                        try:
                            job_id = format_job_id(config.service, config.instance)
                            # Get a dict of healthy tasks, we assume tasks with no healthcheck defined
                            # are healthy. We assume tasks with no healthcheck results but a defined
                            # healthcheck to be unhealthy.
                            log.info("Inspecting %s for autoscaling" % job_id)
                            marathon_tasks = {task.id: task for task in all_marathon_tasks
                                              if job_id == get_short_job_id(task.id) and
                                              (is_task_healthy(task) or not
                                               marathon_client.get_app(task.app_id).health_checks)}
                            if not marathon_tasks:
                                raise MetricsProviderNoDataError("Couldn't find any healthy marathon tasks")
                            mesos_tasks = [task for task in all_mesos_tasks if task['id'] in marathon_tasks]
                            autoscale_marathon_instance(config, list(marathon_tasks.values()), mesos_tasks)
                        except Exception as e:
                            write_to_log(config=config, line='Caught Exception %s' % e)
    except LockHeldException:
        log.warning("Skipping autoscaling run for services because the lock is held")
        pass
Ejemplo n.º 5
0
    def test_get_old_live_draining_tasks_empty(self):
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)

        fake_apps = [
            mock.Mock(id=fake_id, tasks=[]),
            mock.Mock(id=('%s2' % fake_id), tasks=[])
        ]

        expected_live_tasks = {
            fake_apps[0].id: set(),
            fake_apps[1].id: set(),
        }
        expected_draining_tasks = {
            fake_apps[0].id: set(),
            fake_apps[1].id: set(),
        }

        fake_drain_method = mock.Mock(is_draining=lambda _: True)

        actual = setup_marathon_job.get_old_live_draining_tasks(fake_apps, fake_drain_method)
        actual_live_tasks, actual_draining_tasks = actual
        assert actual_live_tasks == expected_live_tasks
        assert actual_draining_tasks == expected_draining_tasks
Ejemplo n.º 6
0
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER)
    try:
        count = len(
            select_tasks_by_id(
                get_cached_list_of_running_tasks_from_frameworks(),
                filter_string))
        if count >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            count = PaastaColors.green("(%d/%d)" %
                                       (count, normal_instance_count))
        elif count == 0:
            status = PaastaColors.red("Critical")
            count = PaastaColors.red("(%d/%d)" %
                                     (count, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            count = PaastaColors.yellow("(%d/%d)" %
                                        (count, normal_instance_count))
        running_string = PaastaColors.bold('TASK_RUNNING')
        return "Mesos:      %s - %s tasks in the %s state." % (status, count,
                                                               running_string)
    except ReadTimeout:
        return "Error: talking to Mesos timed out. It may be overloaded."
    def test_deploy_service_logs_exceptions(self):
        fake_bounce = "WHEEEEEEEEEEEEEEEE"
        fake_drain_method = "noop"
        fake_name = "whoa"
        fake_instance = "the_earth_is_tiny"
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=("%s2" % fake_id), tasks=[])]
        fake_client = mock.MagicMock(list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {"id": fake_id, "instances": 2}

        with contextlib.nested(
            mock.patch("paasta_tools.setup_marathon_job._log", autospec=True),
            mock.patch("paasta_tools.setup_marathon_job.bounce_lib.get_bounce_method_func", side_effect=IOError("foo")),
            mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True),
        ) as (mock_log, mock_bounce, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value="fake_cluster")
            with raises(IOError):
                setup_marathon_job.deploy_service(
                    service=fake_name,
                    instance=fake_instance,
                    marathon_jobid=fake_id,
                    config=fake_config,
                    client=fake_client,
                    bounce_method=fake_bounce,
                    drain_method_name=fake_drain_method,
                    drain_method_params={},
                    nerve_ns=fake_instance,
                    bounce_health_params={},
                    soa_dir="fake_soa_dir",
                )
            assert fake_name in mock_log.mock_calls[0][2]["line"]
            assert "Traceback" in mock_log.mock_calls[1][2]["line"]
Ejemplo n.º 8
0
    def test_get_old_live_draining_tasks_not_empty(self):
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)

        def fake_task(state):
            return mock.Mock(_drain_state=state)

        fake_apps = [
            mock.Mock(id=fake_id, tasks=[fake_task('up'), fake_task('down')]),
            mock.Mock(id=('%s2' % fake_id), tasks=[fake_task('up'), fake_task('down')])
        ]
        expected_live_tasks = {
            fake_apps[0].id: set([fake_apps[0].tasks[0]]),
            fake_apps[1].id: set([fake_apps[1].tasks[0]]),
        }
        expected_draining_tasks = {
            fake_apps[0].id: set([fake_apps[0].tasks[1]]),
            fake_apps[1].id: set([fake_apps[1].tasks[1]]),
        }

        fake_drain_method = mock.Mock(is_draining=lambda t: t._drain_state == 'down')

        actual = setup_marathon_job.get_old_live_draining_tasks(fake_apps, fake_drain_method)
        actual_live_tasks, actual_draining_tasks = actual
        assert actual_live_tasks == expected_live_tasks
        assert actual_draining_tasks == expected_draining_tasks
Ejemplo n.º 9
0
def http_metrics_provider(marathon_service_config, marathon_tasks, mesos_tasks, endpoint='status', *args, **kwargs):
    """
    Gets the average utilization of a service across all of its tasks, where the utilization of
    a task is read from a HTTP endpoint on the host.

    The HTTP endpoint must return JSON with a 'utilization' key with a value from 0 to 1.

    :param marathon_service_config: the MarathonServiceConfig to get data from
    :param marathon_tasks: Marathon tasks to get data from
    :param mesos_tasks: Mesos tasks to get data from

    :returns: the service's average utilization, from 0 to 1
    """

    job_id = format_job_id(marathon_service_config.service, marathon_service_config.instance)
    endpoint = endpoint.lstrip('/')

    def get_short_job_id(task_id):
        return MESOS_TASK_SPACER.join(task_id.split(MESOS_TASK_SPACER, 2)[:2])

    tasks = [task for task in marathon_tasks if job_id == get_short_job_id(task.id) and task.health_check_results]
    utilization = []
    for task in tasks:
        try:
            utilization.append(float(requests.get('http://%s:%s/%s' % (
                task.host, task.ports[0], endpoint)).json()['utilization']))
        except Exception:
            pass
    if not utilization:
        raise IngesterNoDataError('Couldn\'t get any data from http endpoint %s for %s.%s' % (
            endpoint, marathon_service_config.service, marathon_service_config.instance))
    return sum(utilization) / len(utilization)
Ejemplo n.º 10
0
def autoscale_services(soa_dir=DEFAULT_SOA_DIR):
    try:
        with create_autoscaling_lock():
            cluster = load_system_paasta_config().get_cluster()
            configs = get_configs_of_services_to_scale(cluster=cluster, soa_dir=soa_dir)
            if configs:
                marathon_config = load_marathon_config()
                marathon_client = get_marathon_client(
                    url=marathon_config.get_url(),
                    user=marathon_config.get_username(),
                    passwd=marathon_config.get_password())
                all_marathon_tasks = marathon_client.list_tasks()
                all_mesos_tasks = get_running_tasks_from_active_frameworks('')  # empty string matches all app ids
                with ZookeeperPool():
                    for config in configs:
                        try:
                            job_id = format_job_id(config.service, config.instance)
                            # Get a dict of healthy tasks, we assume tasks with no healthcheck defined
                            # are healthy. We assume tasks with no healthcheck results but a defined
                            # healthcheck to be unhealthy.
                            log.info("Inspecting %s for autoscaling" % job_id)
                            marathon_tasks = {task.id: task for task in all_marathon_tasks
                                              if job_id == get_short_job_id(task.id) and
                                              (is_task_healthy(task) or not
                                               marathon_client.get_app(task.app_id).health_checks)}
                            if not marathon_tasks:
                                raise MetricsProviderNoDataError("Couldn't find any healthy marathon tasks")
                            mesos_tasks = [task for task in all_mesos_tasks if task['id'] in marathon_tasks]
                            autoscale_marathon_instance(config, list(marathon_tasks.values()), mesos_tasks)
                        except Exception as e:
                            write_to_log(config=config, line='Caught Exception %s' % e)
    except LockHeldException:
        log.warning("Skipping autoscaling run for services because the lock is held")
        pass
Ejemplo n.º 11
0
def test_status_mesos_tasks_verbose():
    with contextlib.nested(
        mock.patch('paasta_tools.mesos_tools.get_running_tasks_from_active_frameworks', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.get_non_running_tasks_from_active_frameworks', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.format_running_mesos_task_row', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.format_non_running_mesos_task_row', autospec=True,),
    ) as (
        get_running_mesos_tasks_patch,
        get_non_running_mesos_tasks_patch,
        format_running_mesos_task_row_patch,
        format_non_running_mesos_task_row_patch,
    ):
        get_running_mesos_tasks_patch.return_value = ['doing a lap']
        get_non_running_mesos_tasks_patch.return_value = ['eating a burrito']
        format_running_mesos_task_row_patch.return_value = ['id', 'host', 'mem', 'cpu', 'disk', 'time']
        format_non_running_mesos_task_row_patch.return_value = ['id', 'host', 'time', 'state']
        job_id = format_job_id('fake_service', 'fake_instance'),

        def get_short_task_id(_):
            return 'short_task_id'

        actual = mesos_tools.status_mesos_tasks_verbose(job_id, get_short_task_id)
        assert 'Running Tasks' in actual
        assert 'Non-Running Tasks' in actual
        format_running_mesos_task_row_patch.assert_called_once_with('doing a lap', get_short_task_id)
        format_non_running_mesos_task_row_patch.assert_called_once_with('eating a burrito', get_short_task_id)
Ejemplo n.º 12
0
    def test_deploy_service_logs_exceptions(self):
        fake_bounce = 'WHEEEEEEEEEEEEEEEE'
        fake_drain_method = 'noop'
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=('%s2' % fake_id), tasks=[])]
        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {'id': fake_id, 'instances': 2}

        with contextlib.nested(
            mock.patch('setup_marathon_job._log', autospec=True),
            mock.patch('setup_marathon_job.bounce_lib.get_bounce_method_func', side_effect=IOError('foo')),
            mock.patch('setup_marathon_job.load_system_paasta_config', autospec=True),
        ) as (mock_log, mock_bounce, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            with raises(IOError):
                setup_marathon_job.deploy_service(
                    service=fake_name,
                    instance=fake_instance,
                    marathon_jobid=fake_id,
                    config=fake_config,
                    client=fake_client,
                    bounce_method=fake_bounce,
                    drain_method_name=fake_drain_method,
                    drain_method_params={},
                    nerve_ns=fake_instance,
                    bounce_health_params={},
                    soa_dir='fake_soa_dir',
                )
            assert fake_name in mock_log.mock_calls[0][2]["line"]
            assert 'Traceback' in mock_log.mock_calls[1][2]["line"]
Ejemplo n.º 13
0
def autoscale_services(soa_dir=DEFAULT_SOA_DIR):
    try:
        with create_autoscaling_lock():
            cluster = load_system_paasta_config().get_cluster()
            services = get_services_for_cluster(
                cluster=cluster,
                instance_type='marathon',
                soa_dir=soa_dir,
            )
            configs = []
            for service, instance in services:
                service_config = load_marathon_service_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
                if service_config.get_max_instances(
                ) and service_config.get_desired_state() == 'start':
                    configs.append(service_config)

            if configs:
                marathon_config = load_marathon_config()
                all_marathon_tasks = get_marathon_client(
                    url=marathon_config.get_url(),
                    user=marathon_config.get_username(),
                    passwd=marathon_config.get_password(),
                ).list_tasks()
                all_mesos_tasks = get_running_tasks_from_active_frameworks(
                    '')  # empty string matches all app ids
                with ZookeeperPool():
                    for config in configs:
                        if config.get_autoscaling_params(
                        )['decision_policy'] != 'bespoke':
                            try:
                                job_id = format_job_id(config.service,
                                                       config.instance)
                                marathon_tasks = {
                                    task.id: task
                                    for task in all_marathon_tasks
                                    if job_id == get_short_job_id(task.id)
                                    and task.health_check_results
                                }
                                if not marathon_tasks:
                                    raise MetricsProviderNoDataError(
                                        "Couldn't find any healthy marathon tasks"
                                    )
                                mesos_tasks = [
                                    task for task in all_mesos_tasks
                                    if task['id'] in marathon_tasks
                                ]
                                autoscale_marathon_instance(
                                    config, list(marathon_tasks.values()),
                                    mesos_tasks)
                            except Exception as e:
                                raise e
                                write_to_log(config=config,
                                             line='Caught Exception %s' % e)
    except LockHeldException:
        pass
    def test_get_old_live_draining_tasks_not_empty(self):
        fake_name = "whoa"
        fake_instance = "the_earth_is_tiny"
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)

        def fake_task(state):
            return mock.Mock(_drain_state=state)

        fake_apps = [
            mock.Mock(id=fake_id, tasks=[fake_task("up"), fake_task("down")]),
            mock.Mock(id=("%s2" % fake_id), tasks=[fake_task("up"), fake_task("down")]),
        ]
        expected_live_tasks = {
            fake_apps[0].id: set([fake_apps[0].tasks[0]]),
            fake_apps[1].id: set([fake_apps[1].tasks[0]]),
        }
        expected_draining_tasks = {
            fake_apps[0].id: set([fake_apps[0].tasks[1]]),
            fake_apps[1].id: set([fake_apps[1].tasks[1]]),
        }

        fake_drain_method = mock.Mock(is_draining=lambda t: t._drain_state == "down")

        actual = setup_marathon_job.get_old_live_draining_tasks(fake_apps, fake_drain_method)
        actual_live_tasks, actual_draining_tasks = actual
        assert actual_live_tasks == expected_live_tasks
        assert actual_draining_tasks == expected_draining_tasks
Ejemplo n.º 15
0
def filter_autoscaling_tasks(marathon_apps, all_mesos_tasks, config):
    job_id_prefix = "%s%s" % (format_job_id(
        service=config.service, instance=config.instance), MESOS_TASK_SPACER)

    # Get a dict of healthy tasks, we assume tasks with no healthcheck defined
    # are healthy. We assume tasks with no healthcheck results but a defined
    # healthcheck to be unhealthy (unless they are "old" in which case we
    # assume that marathon has screwed up and stopped healthchecking but that
    # they are healthy
    log.info("Inspecting %s for autoscaling" % job_id_prefix)
    marathon_tasks = {}
    for app in marathon_apps:
        for task in app.tasks:
            if task.id.startswith(job_id_prefix) and (
                    is_task_healthy(task) or not app.health_checks
                    or is_old_task_missing_healthchecks(task, app)):
                marathon_tasks[task.id] = task

    if not marathon_tasks:
        raise MetricsProviderNoDataError(
            "Couldn't find any healthy marathon tasks")
    mesos_tasks = [
        task for task in all_mesos_tasks if task['id'] in marathon_tasks
    ]
    return (marathon_tasks, mesos_tasks)
Ejemplo n.º 16
0
    def test_get_old_live_draining_tasks_empty(self):
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)

        fake_apps = [
            mock.Mock(id=fake_id, tasks=[]),
            mock.Mock(id=('%s2' % fake_id), tasks=[])
        ]

        expected_live_tasks = {
            fake_apps[0].id: set(),
            fake_apps[1].id: set(),
        }
        expected_draining_tasks = {
            fake_apps[0].id: set(),
            fake_apps[1].id: set(),
        }

        fake_drain_method = mock.Mock(is_draining=lambda _: True)

        actual = setup_marathon_job.get_old_live_draining_tasks(fake_apps, fake_drain_method)
        actual_live_tasks, actual_draining_tasks = actual
        assert actual_live_tasks == expected_live_tasks
        assert actual_draining_tasks == expected_draining_tasks
Ejemplo n.º 17
0
    def test_deploy_service_logs_exceptions(self):
        fake_bounce = 'WHEEEEEEEEEEEEEEEE'
        fake_drain_method = 'noop'
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=('%s2' % fake_id), tasks=[])]
        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {'id': fake_id, 'instances': 2}

        with contextlib.nested(
            mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
            mock.patch('paasta_tools.setup_marathon_job.bounce_lib.get_bounce_method_func', side_effect=IOError('foo')),
            mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
        ) as (mock_log, mock_bounce, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            with raises(IOError):
                setup_marathon_job.deploy_service(
                    service=fake_name,
                    instance=fake_instance,
                    marathon_jobid=fake_id,
                    config=fake_config,
                    client=fake_client,
                    bounce_method=fake_bounce,
                    drain_method_name=fake_drain_method,
                    drain_method_params={},
                    nerve_ns=fake_instance,
                    bounce_health_params={},
                    soa_dir='fake_soa_dir',
                )
            assert fake_name in mock_log.mock_calls[0][2]["line"]
            assert 'Traceback' in mock_log.mock_calls[1][2]["line"]
 def test_setup_service_srv_already_exists(self):
     fake_name = "if_trees_could_talk"
     fake_instance = "would_they_scream"
     fake_client = mock.MagicMock(get_app=mock.Mock(return_value=True))
     full_id = marathon_tools.format_job_id(fake_name, fake_instance)
     fake_complete = {"seven": "full", "eight": "frightened", "nine": "eaten", "id": full_id}
     with contextlib.nested(
         mock.patch("paasta_tools.marathon_tools.create_complete_config", return_value=fake_complete, autospec=True),
         mock.patch(
             "paasta_tools.marathon_tools.load_marathon_config",
             return_value=self.fake_marathon_config,
             autospec=True,
         ),
         mock.patch("paasta_tools.setup_marathon_job.deploy_service", autospec=True),
     ) as (create_config_patch, get_config_patch, deploy_service_patch):
         setup_marathon_job.setup_service(
             service=fake_name,
             instance=fake_instance,
             client=fake_client,
             marathon_config=self.fake_marathon_config,
             service_marathon_config=self.fake_marathon_service_config,
             soa_dir=None,
         )
         create_config_patch.assert_called_once_with(fake_name, fake_instance, self.fake_marathon_config)
         assert deploy_service_patch.call_count == 1
Ejemplo n.º 19
0
def deploy_marathon_service(service, instance, client, soa_dir,
                            marathon_config, marathon_apps):
    """deploy the service instance given and proccess return code
    if there was an error we send a sensu alert.

    :param service: The service name to setup
    :param instance: The instance of the service to setup
    :param client: A MarathonClient object
    :param soa_dir: Path to yelpsoa configs
    :param marathon_config: The service instance's configuration dict
    :param marathon_apps: A list of all marathon app objects
    :returns: A tuple of (status, bounce_in_seconds) to be used by paasta-deployd
        bounce_in_seconds instructs how long until the deployd should try another bounce
        None means that it is in a steady state and doesn't need to bounce again
    """
    short_id = marathon_tools.format_job_id(service, instance)
    try:
        with bounce_lib.bounce_lock_zookeeper(short_id):
            try:
                service_instance_config = marathon_tools.load_marathon_service_config_no_cache(
                    service,
                    instance,
                    load_system_paasta_config().get_cluster(),
                    soa_dir=soa_dir,
                )
            except NoDeploymentsAvailable:
                log.debug(
                    "No deployments found for %s.%s in cluster %s. Skipping." %
                    (service, instance,
                     load_system_paasta_config().get_cluster()))
                return 0, None
            except NoConfigurationForServiceError:
                error_msg = "Could not read marathon configuration file for %s.%s in cluster %s" % \
                            (service, instance, load_system_paasta_config().get_cluster())
                log.error(error_msg)
                return 1, None

            try:
                status, output, bounce_again_in_seconds = setup_service(
                    service,
                    instance,
                    client,
                    service_instance_config,
                    marathon_apps,
                    soa_dir,
                )
                sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK
                send_event(service, instance, soa_dir, sensu_status, output)
                return 0, bounce_again_in_seconds
            except (KeyError, TypeError, AttributeError, InvalidInstanceConfig,
                    NoSlavesAvailableError):
                error_str = traceback.format_exc()
                log.error(error_str)
                send_event(service, instance, soa_dir,
                           pysensu_yelp.Status.CRITICAL, error_str)
                return 1, None
    except bounce_lib.LockHeldException:
        log.error("Instance %s already being bounced. Exiting", short_id)
        return 0, None
Ejemplo n.º 20
0
def write_to_log(config, line, level='event'):
    _log(
        service=config.service,
        line="%s: %s" % (format_job_id(config.service, config.instance), line),
        component='deploy',
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
Ejemplo n.º 21
0
def write_to_log(config, line, level='event'):
    _log(
        service=config.service,
        line="%s: %s" % (format_job_id(config.service, config.instance), line),
        component='deploy',
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
Ejemplo n.º 22
0
async def marathon_mesos_status(service: str, instance: str,
                                verbose: int) -> MutableMapping[str, Any]:
    mesos_status: MutableMapping[str, Any] = {}

    job_id = marathon_tools.format_job_id(service, instance)
    job_id_filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}"

    try:
        running_and_active_tasks = select_tasks_by_id(
            await get_cached_list_of_running_tasks_from_frameworks(),
            job_id=job_id_filter_string,
        )
    except (ReadTimeout, asyncio.TimeoutError):
        return {
            "error_message":
            "Talking to Mesos timed out. It may be overloaded."
        }

    mesos_status["running_task_count"] = len(running_and_active_tasks)

    if verbose > 0:
        num_tail_lines = calculate_tail_lines(verbose)
        running_task_dict_futures = []

        for task in running_and_active_tasks:
            running_task_dict_futures.append(
                asyncio.ensure_future(
                    get_mesos_running_task_dict(task, num_tail_lines)))

        non_running_tasks = select_tasks_by_id(
            await get_cached_list_of_not_running_tasks_from_frameworks(),
            job_id=job_id_filter_string,
        )
        non_running_tasks.sort(
            key=lambda task: get_first_status_timestamp(task) or 0)
        non_running_tasks = list(reversed(non_running_tasks[-10:]))
        non_running_task_dict_futures = []
        for task in non_running_tasks:
            non_running_task_dict_futures.append(
                asyncio.ensure_future(
                    get_mesos_non_running_task_dict(task, num_tail_lines)))

        all_task_dict_futures = (running_task_dict_futures +
                                 non_running_task_dict_futures)
        if len(all_task_dict_futures):
            await asyncio.wait(all_task_dict_futures)

        mesos_status["running_tasks"] = [
            task_future.result() for task_future in running_task_dict_futures
        ]
        mesos_status["non_running_tasks"] = [
            task_future.result()
            for task_future in non_running_task_dict_futures
        ]

    return mesos_status
Ejemplo n.º 23
0
def marathon_log_line_passes_filter(line, levels, service, components, clusters):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False
    return format_job_id(service, '') in parsed_line.get('message', '')
Ejemplo n.º 24
0
def cleanup_apps(soa_dir, kill_threshold=0.5, force=False):
    """Clean up old or invalid jobs/apps from marathon. Retrieves
    both a list of apps currently in marathon and a list of valid
    app ids in order to determine what to kill.

    :param soa_dir: The SOA config directory to read from
    :param kill_threshold: The decimal fraction of apps we think is
        sane to kill when this job runs.
    :param force: Force the cleanup if we are above the kill_threshold"""
    log.info("Loading marathon configuration")
    marathon_config = marathon_tools.load_marathon_config()
    log.info("Connecting to marathon")
    client = marathon_tools.get_marathon_client(
        marathon_config.get_url(),
        marathon_config.get_username(),
        marathon_config.get_password(),
    )

    valid_services = get_services_for_cluster(instance_type='marathon',
                                              soa_dir=soa_dir)
    running_app_ids = marathon_tools.list_all_marathon_app_ids(client)

    running_apps = []
    for app_id in running_app_ids:
        try:
            app_id = marathon_tools.deformat_job_id(app_id)
        except InvalidJobNameError:
            log.warn(
                "%s doesn't conform to paasta naming conventions? Skipping." %
                app_id)
            continue
        running_apps.append(app_id)
    apps_to_kill = [(service, instance, git_sha, config_sha)
                    for service, instance, git_sha, config_sha in running_apps
                    if (service, instance) not in valid_services]

    log.debug("Running apps: %s" % running_apps)
    log.debug("Valid apps: %s" % valid_services)
    log.debug("Terminating: %s" % apps_to_kill)
    if running_apps:
        above_kill_threshold = float(len(apps_to_kill)) / float(
            len(running_apps)) > float(kill_threshold)
        if above_kill_threshold and not force:
            log.critical(
                "Paasta was about to kill more than %s of the running services, this "
                "is probably a BAD mistake!, run again with --force if you "
                "really need to destroy everything" % kill_threshold, )
            raise DontKillEverythingError
    for running_app in apps_to_kill:
        app_id = marathon_tools.format_job_id(*running_app)
        delete_app(
            app_id=app_id,
            client=client,
            soa_dir=soa_dir,
        )
Ejemplo n.º 25
0
Archivo: logs.py Proyecto: ese/paasta
def marathon_log_line_passes_filter(line, levels, service, components,
                                    clusters):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False
    return format_job_id(service, '') in parsed_line.get('message', '')
Ejemplo n.º 26
0
def check_healthy_marathon_tasks_for_service_instance(instance_config,
                                                      expected_count,
                                                      all_tasks):
    app_id = format_job_id(instance_config.service, instance_config.instance)
    num_healthy_tasks = filter_healthy_marathon_instances_for_short_app_id(
        all_tasks=all_tasks, app_id=app_id)
    log.info("Checking %s in marathon as it is not in smartstack" % app_id)
    monitoring_tools.send_replication_event_if_under_replication(
        instance_config=instance_config,
        expected_count=expected_count,
        num_available=num_healthy_tasks,
    )
def check_healthy_marathon_tasks_for_service_instance(client, service, instance, cluster, soa_dir, expected_count):
    app_id = format_job_id(service, instance)
    log.info("Checking %s in marathon as it is not in smartstack" % app_id)
    num_healthy_tasks = get_healthy_marathon_instances_for_short_app_id(client, app_id)
    send_event_if_under_replication(
        service=service,
        instance=instance,
        cluster=cluster,
        expected_count=expected_count,
        num_available=num_healthy_tasks,
        soa_dir=soa_dir,
    )
Ejemplo n.º 28
0
def check_healthy_marathon_tasks_for_service_instance(client, service, instance, cluster,
                                                      soa_dir, expected_count):
    app_id = format_job_id(service, instance)
    log.info("Checking %s in marathon as it is not in smartstack" % app_id)
    num_healthy_tasks = get_healthy_marathon_instances_for_short_app_id(client, app_id)
    send_event_if_under_replication(
        service=service,
        instance=instance,
        cluster=cluster,
        expected_count=expected_count,
        num_available=num_healthy_tasks,
        soa_dir=soa_dir,
    )
Ejemplo n.º 29
0
Archivo: logs.py Proyecto: fnzv/paasta
def marathon_log_line_passes_filter(line, levels, service, components, clusters, start_time=None, end_time=None):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False

    timestamp = isodate.parse_datetime(parsed_line.get('timestamp'))
    if not check_timestamp_in_range(timestamp, start_time, end_time):
        return False
    return format_job_id(service, '') in parsed_line.get('message', '')
Ejemplo n.º 30
0
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id)
    count = len(running_and_active_tasks)
    if count >= normal_instance_count:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count))
    elif count == 0:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count))
    else:
        status = PaastaColors.yellow("Warning")
        count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count))
    running_string = PaastaColors.bold('TASK_RUNNING')
    return "Mesos:      %s - %s tasks in the %s state." % (status, count, running_string)
Ejemplo n.º 31
0
def autoscale_services(soa_dir=DEFAULT_SOA_DIR):
    try:
        with create_autoscaling_lock():
            cluster = load_system_paasta_config().get_cluster()
            services = get_services_for_cluster(
                cluster=cluster,
                instance_type='marathon',
                soa_dir=soa_dir,
            )
            configs = []
            for service, instance in services:
                service_config = load_marathon_service_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
                if service_config.get_max_instances() and service_config.get_desired_state() == 'start' \
                        and service_config.get_autoscaling_params()['decision_policy'] != 'bespoke':
                    configs.append(service_config)

            if configs:
                marathon_config = load_marathon_config()
                marathon_client = get_marathon_client(
                    url=marathon_config.get_url(),
                    user=marathon_config.get_username(),
                    passwd=marathon_config.get_password())
                all_marathon_tasks = marathon_client.list_tasks()
                all_mesos_tasks = get_running_tasks_from_active_frameworks('')  # empty string matches all app ids
                with ZookeeperPool():
                    for config in configs:
                        try:
                            job_id = format_job_id(config.service, config.instance)
                            # Get a dict of healthy tasks, we assume tasks with no healthcheck defined
                            # are healthy. We assume tasks with no healthcheck results but a defined
                            # healthcheck to be unhealthy.
                            marathon_tasks = {task.id: task for task in all_marathon_tasks
                                              if job_id == get_short_job_id(task.id) and
                                              (is_task_healthy(task) or not
                                               marathon_client.get_app(task.app_id).health_checks)}
                            if not marathon_tasks:
                                raise MetricsProviderNoDataError("Couldn't find any healthy marathon tasks")
                            mesos_tasks = [task for task in all_mesos_tasks if task['id'] in marathon_tasks]
                            autoscale_marathon_instance(config, list(marathon_tasks.values()), mesos_tasks)
                        except Exception as e:
                            write_to_log(config=config, line='Caught Exception %s' % e)
    except LockHeldException:
        pass
Ejemplo n.º 32
0
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id)
    count = len(running_and_active_tasks)
    if count >= normal_instance_count:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count))
    elif count == 0:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count))
    else:
        status = PaastaColors.yellow("Warning")
        count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count))
    running_string = PaastaColors.bold('TASK_RUNNING')
    return "Mesos:      %s - %s tasks in the %s state." % (status, count,
                                                           running_string)
Ejemplo n.º 33
0
def cleanup_apps(soa_dir, kill_threshold=0.5, force=False):
    """Clean up old or invalid jobs/apps from marathon. Retrieves
    both a list of apps currently in marathon and a list of valid
    app ids in order to determine what to kill.

    :param soa_dir: The SOA config directory to read from
    :param kill_threshold: The decimal fraction of apps we think is
        sane to kill when this job runs.
    :param force: Force the cleanup if we are above the kill_threshold"""
    log.info("Loading marathon configuration")
    marathon_config = marathon_tools.load_marathon_config()
    log.info("Connecting to marathon")
    client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(),
                                                marathon_config.get_password())

    valid_services = get_services_for_cluster(instance_type='marathon', soa_dir=soa_dir)
    running_app_ids = marathon_tools.list_all_marathon_app_ids(client)

    running_apps = []
    for app_id in running_app_ids:
        try:
            app_id = marathon_tools.deformat_job_id(app_id)
        except InvalidJobNameError:
            log.warn("%s doesn't conform to paasta naming conventions? Skipping." % app_id)
            continue
        running_apps.append(app_id)
    apps_to_kill = [(service, instance, git_sha, config_sha)
                    for service, instance, git_sha, config_sha in running_apps
                    if (service, instance) not in valid_services]

    log.debug("Running apps: %s" % running_apps)
    log.debug("Valid apps: %s" % valid_services)
    log.debug("Terminating: %s" % apps_to_kill)
    if running_apps:
        above_kill_threshold = float(len(apps_to_kill)) / float(len(running_apps)) > float(kill_threshold)
        if above_kill_threshold and not force:
            log.critical("Paasta was about to kill more than %s of the running services, this "
                         "is probably a BAD mistake!, run again with --force if you "
                         "really need to destroy everything" % kill_threshold)
            raise DontKillEverythingError
    for running_app in apps_to_kill:
        app_id = marathon_tools.format_job_id(*running_app)
        delete_app(
            app_id=app_id,
            client=client,
            soa_dir=soa_dir,
        )
Ejemplo n.º 34
0
def test_status_mesos_tasks_verbose(test_case):
    tail_lines, expected_format_tail_call_count = test_case
    filter_string = format_job_id('fake_service', 'fake_instance')

    with asynctest.patch(
        'paasta_tools.mesos_tools.get_cached_list_of_running_tasks_from_frameworks', autospec=True,
        return_value=[{'id': filter_string}],
    ), asynctest.patch(
        'paasta_tools.mesos_tools.get_cached_list_of_not_running_tasks_from_frameworks', autospec=True,
    ) as get_cached_list_of_not_running_tasks_from_frameworks_patch, asynctest.patch(
        'paasta_tools.mesos_tools.format_running_mesos_task_row', autospec=True,
    ) as format_running_mesos_task_row_patch, asynctest.patch(
        'paasta_tools.mesos_tools.format_non_running_mesos_task_row', autospec=True,
    ) as format_non_running_mesos_task_row_patch, asynctest.patch(
        'paasta_tools.mesos_tools.format_stdstreams_tail_for_task', autospec=True,
    ) as format_stdstreams_tail_for_task_patch:

        template_task_return = {
            'id': filter_string,
            'statuses': [{'timestamp': '##########'}],
            'state': 'NOT_RUNNING',
        }
        non_running_mesos_tasks = []
        for _ in range(15):  # excercise the code that sorts/truncates the list of non running tasks
            task_return = template_task_return.copy()
            task_return['statuses'][0]['timestamp'] = str(1457109986 + random.randrange(-60 * 60 * 24, 60 * 60 * 24))
            non_running_mesos_tasks.append(task_return)
        get_cached_list_of_not_running_tasks_from_frameworks_patch.return_value = non_running_mesos_tasks

        format_running_mesos_task_row_patch.return_value = ['id', 'host', 'mem', 'cpu', 'time']
        format_non_running_mesos_task_row_patch.return_value = ['id', 'host', 'time', 'state']
        format_stdstreams_tail_for_task_patch.return_value = ['tail']

        actual = mesos_tools.status_mesos_tasks_verbose(
            filter_string=filter_string,
            get_short_task_id=mock.sentinel.get_short_task_id,
            tail_lines=tail_lines,
        )
        assert 'Running Tasks' in actual
        assert 'Non-Running Tasks' in actual
        format_running_mesos_task_row_patch.assert_called_once_with(
            {'id': filter_string},
            mock.sentinel.get_short_task_id,
        )
        assert format_non_running_mesos_task_row_patch.call_count == 10  # maximum n of tasks we display
        assert format_stdstreams_tail_for_task_patch.call_count == expected_format_tail_call_count
Ejemplo n.º 35
0
def test_status_mesos_tasks_verbose(test_case):
    tail_lines, expected_format_tail_call_count = test_case
    with contextlib.nested(
        mock.patch('paasta_tools.mesos_tools.get_running_tasks_from_active_frameworks', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.get_non_running_tasks_from_active_frameworks', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.format_running_mesos_task_row', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.format_non_running_mesos_task_row', autospec=True,),
        mock.patch('paasta_tools.mesos_tools.format_stdstreams_tail_for_task', autospec=True,),
    ) as (
        get_running_mesos_tasks_patch,
        get_non_running_mesos_tasks_patch,
        format_running_mesos_task_row_patch,
        format_non_running_mesos_task_row_patch,
        format_stdstreams_tail_for_task_patch,
    ):
        get_running_mesos_tasks_patch.return_value = ['doing a lap']

        template_task_return = {
            'statuses': [{'timestamp': '##########'}],
            'state': 'NOT_RUNNING',
        }
        non_running_mesos_tasks = []
        for _ in xrange(15):  # excercise the code that sorts/truncates the list of non running tasks
            task_return = template_task_return.copy()
            task_return['statuses'][0]['timestamp'] = str(1457109986 + random.randrange(-60 * 60 * 24, 60 * 60 * 24))
            non_running_mesos_tasks.append(task_return)
        get_non_running_mesos_tasks_patch.return_value = non_running_mesos_tasks

        format_running_mesos_task_row_patch.return_value = ['id', 'host', 'mem', 'cpu', 'time']
        format_non_running_mesos_task_row_patch.return_value = ['id', 'host', 'time', 'state']
        format_stdstreams_tail_for_task_patch.return_value = ['tail']
        job_id = format_job_id('fake_service', 'fake_instance'),

        def get_short_task_id(_):
            return 'short_task_id'

        actual = mesos_tools.status_mesos_tasks_verbose(
            job_id=job_id,
            get_short_task_id=get_short_task_id,
            tail_lines=tail_lines,
        )
        assert 'Running Tasks' in actual
        assert 'Non-Running Tasks' in actual
        format_running_mesos_task_row_patch.assert_called_once_with('doing a lap', get_short_task_id)
        assert format_non_running_mesos_task_row_patch.call_count == 10  # maximum n of tasks we display
        assert format_stdstreams_tail_for_task_patch.call_count == expected_format_tail_call_count
Ejemplo n.º 36
0
 def test_setup_service_srv_already_exists(self):
     fake_name = 'if_trees_could_talk'
     fake_instance = 'would_they_scream'
     fake_client = mock.MagicMock(get_app=mock.Mock(return_value=True))
     full_id = marathon_tools.format_job_id(fake_name, fake_instance)
     fake_complete = {
         'seven': 'full',
         'eight': 'frightened',
         'nine': 'eaten',
         'id': full_id,
     }
     with contextlib.nested(
         mock.patch(
             'paasta_tools.marathon_tools.create_complete_config',
             return_value=fake_complete,
             autospec=True,
         ),
         mock.patch(
             'paasta_tools.marathon_tools.load_marathon_config',
             return_value=self.fake_marathon_config,
             autospec=True,
         ),
         mock.patch(
             'setup_marathon_job.deploy_service',
             autospec=True,
         ),
     ) as (
         create_config_patch,
         get_config_patch,
         deploy_service_patch,
     ):
         setup_marathon_job.setup_service(
             service=fake_name,
             instance=fake_instance,
             client=fake_client,
             marathon_config=self.fake_marathon_config,
             service_marathon_config=self.fake_marathon_service_config,
             soa_dir=None,
         )
         create_config_patch.assert_called_once_with(
             fake_name,
             fake_instance,
             self.fake_marathon_config,
         )
         assert deploy_service_patch.call_count == 1
Ejemplo n.º 37
0
 def test_setup_service_srv_already_exists(self):
     fake_name = 'if_trees_could_talk'
     fake_instance = 'would_they_scream'
     fake_client = mock.MagicMock(get_app=mock.Mock(return_value=True))
     full_id = marathon_tools.format_job_id(fake_name, fake_instance)
     fake_complete = {
         'seven': 'full',
         'eight': 'frightened',
         'nine': 'eaten',
         'id': full_id,
     }
     with contextlib.nested(
         mock.patch(
             'paasta_tools.marathon_tools.create_complete_config',
             return_value=fake_complete,
             autospec=True,
         ),
         mock.patch(
             'paasta_tools.marathon_tools.load_marathon_config',
             return_value=self.fake_marathon_config,
             autospec=True,
         ),
         mock.patch(
             'paasta_tools.setup_marathon_job.deploy_service',
             autospec=True,
         ),
     ) as (
         create_config_patch,
         get_config_patch,
         deploy_service_patch,
     ):
         setup_marathon_job.setup_service(
             service=fake_name,
             instance=fake_instance,
             client=fake_client,
             marathon_config=self.fake_marathon_config,
             service_marathon_config=self.fake_marathon_service_config,
             soa_dir=None,
         )
         create_config_patch.assert_called_once_with(
             fake_name,
             fake_instance,
             self.fake_marathon_config,
         )
         assert deploy_service_patch.call_count == 1
Ejemplo n.º 38
0
def test_status_mesos_tasks_verbose():
    with contextlib.nested(
            mock.patch(
                'paasta_tools.mesos_tools.get_running_tasks_from_active_frameworks',
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.mesos_tools.get_non_running_tasks_from_active_frameworks',
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.mesos_tools.format_running_mesos_task_row',
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.mesos_tools.format_non_running_mesos_task_row',
                autospec=True,
            ),
    ) as (
            get_running_mesos_tasks_patch,
            get_non_running_mesos_tasks_patch,
            format_running_mesos_task_row_patch,
            format_non_running_mesos_task_row_patch,
    ):
        get_running_mesos_tasks_patch.return_value = ['doing a lap']
        get_non_running_mesos_tasks_patch.return_value = ['eating a burrito']
        format_running_mesos_task_row_patch.return_value = [
            'id', 'host', 'mem', 'cpu', 'time'
        ]
        format_non_running_mesos_task_row_patch.return_value = [
            'id', 'host', 'time', 'state'
        ]
        job_id = format_job_id('fake_service', 'fake_instance'),

        def get_short_task_id(_):
            return 'short_task_id'

        actual = mesos_tools.status_mesos_tasks_verbose(
            job_id, get_short_task_id)
        assert 'Running Tasks' in actual
        assert 'Non-Running Tasks' in actual
        format_running_mesos_task_row_patch.assert_called_once_with(
            'doing a lap', get_short_task_id)
        format_non_running_mesos_task_row_patch.assert_called_once_with(
            'eating a burrito', get_short_task_id)
Ejemplo n.º 39
0
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER)
    running_and_active_tasks = get_running_tasks_from_active_frameworks(filter_string)
    count = len(running_and_active_tasks)
    if count >= normal_instance_count:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count))
    elif count == 0:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count))
    else:
        status = PaastaColors.yellow("Warning")
        count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count))
    running_string = PaastaColors.bold('TASK_RUNNING')
    return "Mesos:      %s - %s tasks in the %s state." % (status, count, running_string)
Ejemplo n.º 40
0
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER)
    running_and_active_tasks = get_running_tasks_from_active_frameworks(filter_string)
    count = len(running_and_active_tasks)
    if count >= normal_instance_count:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count))
    elif count == 0:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count))
    else:
        status = PaastaColors.yellow("Warning")
        count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count))
    running_string = PaastaColors.bold('TASK_RUNNING')
    return "Mesos:      %s - %s tasks in the %s state." % (status, count, running_string)
Ejemplo n.º 41
0
def status_mesos_tasks(
    service: str,
    instance: str,
    normal_instance_count: int,
    verbose: int,
) -> str:
    job_id = marathon_tools.format_job_id(service, instance)
    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}"

    try:
        count = len(
            select_tasks_by_id(
                a_sync.block(get_cached_list_of_running_tasks_from_frameworks),
                filter_string))
        if count >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            count_str = PaastaColors.green("(%d/%d)" %
                                           (count, normal_instance_count))
        elif count == 0:
            status = PaastaColors.red("Critical")
            count_str = PaastaColors.red("(%d/%d)" %
                                         (count, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            count_str = PaastaColors.yellow("(%d/%d)" %
                                            (count, normal_instance_count))
        running_string = PaastaColors.bold('TASK_RUNNING')
        output = f"Mesos:      {status} - {count_str} tasks in the {running_string} state."
    except ReadTimeout:
        return "Error: talking to Mesos timed out. It may be overloaded."

    if verbose > 0:
        tail_lines = calculate_tail_lines(verbose_level=verbose)
        output += '\n' + status_mesos_tasks_verbose(
            filter_string=filter_string,
            get_short_task_id=get_short_task_id,
            tail_lines=tail_lines,
        )

    return output
Ejemplo n.º 42
0
Archivo: logs.py Proyecto: fnzv/paasta
def marathon_log_line_passes_filter(line,
                                    levels,
                                    service,
                                    components,
                                    clusters,
                                    start_time=None,
                                    end_time=None):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False

    timestamp = isodate.parse_datetime(parsed_line.get('timestamp'))
    if not check_timestamp_in_range(timestamp, start_time, end_time):
        return False
    return format_job_id(service, '') in parsed_line.get('message', '')
Ejemplo n.º 43
0
def deploy_marathon_service(service, instance, client, soa_dir,
                            marathon_config, marathon_apps):
    short_id = marathon_tools.format_job_id(service, instance)
    try:
        with bounce_lib.bounce_lock_zookeeper(short_id):
            try:
                service_instance_config = marathon_tools.load_marathon_service_config(
                    service,
                    instance,
                    load_system_paasta_config().get_cluster(),
                    soa_dir=soa_dir,
                )
            except NoDeploymentsAvailable:
                log.debug(
                    "No deployments found for %s.%s in cluster %s. Skipping." %
                    (service, instance,
                     load_system_paasta_config().get_cluster()))
                return 0
            except NoConfigurationForServiceError:
                error_msg = "Could not read marathon configuration file for %s.%s in cluster %s" % \
                            (service, instance, load_system_paasta_config().get_cluster())
                log.error(error_msg)
                return 1

            try:
                status, output = setup_service(service, instance, client,
                                               service_instance_config,
                                               marathon_apps, soa_dir)
                sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK
                send_event(service, instance, soa_dir, sensu_status, output)
                return 0
            except (KeyError, TypeError, AttributeError,
                    InvalidInstanceConfig):
                error_str = traceback.format_exc()
                log.error(error_str)
                send_event(service, instance, soa_dir,
                           pysensu_yelp.Status.CRITICAL, error_str)
                return 1
    except bounce_lib.LockHeldException:
        log.error("Instance %s already being bounced. Exiting", short_id)
        return 0
Ejemplo n.º 44
0
def test_status_mesos_tasks_verbose(test_case):
    tail_stdstreams, expected_format_tail_call_count = test_case
    with contextlib.nested(
        mock.patch("paasta_tools.mesos_tools.get_running_tasks_from_active_frameworks", autospec=True),
        mock.patch("paasta_tools.mesos_tools.get_non_running_tasks_from_active_frameworks", autospec=True),
        mock.patch("paasta_tools.mesos_tools.format_running_mesos_task_row", autospec=True),
        mock.patch("paasta_tools.mesos_tools.format_non_running_mesos_task_row", autospec=True),
        mock.patch("paasta_tools.mesos_tools.format_stdstreams_tail_for_task", autospec=True),
    ) as (
        get_running_mesos_tasks_patch,
        get_non_running_mesos_tasks_patch,
        format_running_mesos_task_row_patch,
        format_non_running_mesos_task_row_patch,
        format_stdstreams_tail_for_task_patch,
    ):
        get_running_mesos_tasks_patch.return_value = ["doing a lap"]

        template_task_return = {"statuses": [{"timestamp": "##########"}], "state": "NOT_RUNNING"}
        non_running_mesos_tasks = []
        for _ in xrange(15):  # excercise the code that sorts/truncates the list of non running tasks
            task_return = template_task_return.copy()
            task_return["statuses"][0]["timestamp"] = str(1457109986 + random.randrange(-60 * 60 * 24, 60 * 60 * 24))
            non_running_mesos_tasks.append(task_return)
        get_non_running_mesos_tasks_patch.return_value = non_running_mesos_tasks

        format_running_mesos_task_row_patch.return_value = ["id", "host", "mem", "cpu", "time"]
        format_non_running_mesos_task_row_patch.return_value = ["id", "host", "time", "state"]
        format_stdstreams_tail_for_task_patch.return_value = ["tail"]
        job_id = (format_job_id("fake_service", "fake_instance"),)

        def get_short_task_id(_):
            return "short_task_id"

        actual = mesos_tools.status_mesos_tasks_verbose(job_id, get_short_task_id, tail_stdstreams)
        assert "Running Tasks" in actual
        assert "Non-Running Tasks" in actual
        format_running_mesos_task_row_patch.assert_called_once_with("doing a lap", get_short_task_id)
        assert format_non_running_mesos_task_row_patch.call_count == 10  # maximum n of tasks we display
        assert format_stdstreams_tail_for_task_patch.call_count == expected_format_tail_call_count
Ejemplo n.º 45
0
    def test_deploy_service_unknown_drain_method(self):
        fake_bounce = 'exists'
        fake_drain_method = 'doesntexist'
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=('%s2' % fake_id), tasks=[])]
        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {'id': fake_id, 'instances': 2}

        errormsg = 'ERROR: drain_method not recognized: doesntexist. Must be one of (exists1, exists2)'
        expected = (1, errormsg)

        with contextlib.nested(
            mock.patch('setup_marathon_job._log', autospec=True),
            mock.patch('setup_marathon_job.load_system_paasta_config', autospec=True),
            mock.patch(
                'paasta_tools.drain_lib._drain_methods',
                new={'exists1': mock.Mock(), 'exists2': mock.Mock()},
            )
        ) as (mock_log, mock_load_system_paasta_config, mock_drain_methods):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            actual = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir='fake_soa_dir',
            )
            assert mock_log.call_count == 1
        assert expected == actual
Ejemplo n.º 46
0
    def test_deploy_service_unknown_drain_method(self):
        fake_bounce = 'exists'
        fake_drain_method = 'doesntexist'
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=('%s2' % fake_id), tasks=[])]
        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {'id': fake_id, 'instances': 2}

        errormsg = 'ERROR: drain_method not recognized: doesntexist. Must be one of (exists1, exists2)'
        expected = (1, errormsg)

        with contextlib.nested(
            mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
            mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
            mock.patch(
                'paasta_tools.drain_lib._drain_methods',
                new={'exists1': mock.Mock(), 'exists2': mock.Mock()},
            )
        ) as (mock_log, mock_load_system_paasta_config, mock_drain_methods):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            actual = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir='fake_soa_dir',
            )
            assert mock_log.call_count == 1
        assert expected == actual
    def test_deploy_service_unknown_bounce(self):
        fake_bounce = "WHEEEEEEEEEEEEEEEE"
        fake_drain_method = "noop"
        fake_name = "whoa"
        fake_instance = "the_earth_is_tiny"
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=("%s2" % fake_id), tasks=[])]
        fake_client = mock.MagicMock(list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {"id": fake_id, "instances": 2}

        errormsg = "ERROR: bounce_method not recognized: %s. Must be one of (%s)" % (
            fake_bounce,
            ", ".join(list_bounce_methods()),
        )
        expected = (1, errormsg)

        with contextlib.nested(
            mock.patch("paasta_tools.setup_marathon_job._log", autospec=True),
            mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True),
        ) as (mock_log, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value="fake_cluster")
            actual = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir="fake_soa_dir",
            )
            assert mock_log.call_count == 1
        assert expected == actual
        fake_client.list_apps.assert_called_once_with(embed_failures=True)
        assert fake_client.create_app.call_count == 0
Ejemplo n.º 48
0
    def test_deploy_service_unknown_bounce(self):
        fake_bounce = 'WHEEEEEEEEEEEEEEEE'
        fake_drain_method = 'noop'
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=('%s2' % fake_id), tasks=[])]
        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {'id': fake_id, 'instances': 2}

        errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
            (fake_bounce, ', '.join(list_bounce_methods()))
        expected = (1, errormsg)

        with contextlib.nested(
            mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
            mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
        ) as (mock_log, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            actual = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir='fake_soa_dir',
            )
            assert mock_log.call_count == 1
        assert expected == actual
        fake_client.list_apps.assert_called_once_with(embed_failures=True)
        assert fake_client.create_app.call_count == 0
Ejemplo n.º 49
0
    def test_deploy_service_unknown_bounce(self):
        fake_bounce = 'WHEEEEEEEEEEEEEEEE'
        fake_drain_method = 'noop'
        fake_name = 'whoa'
        fake_instance = 'the_earth_is_tiny'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance)
        fake_apps = [mock.Mock(id=fake_id, tasks=[]), mock.Mock(id=('%s2' % fake_id), tasks=[])]
        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=fake_apps))
        fake_config = {'id': fake_id, 'instances': 2}

        errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
            (fake_bounce, ', '.join(list_bounce_methods()))
        expected = (1, errormsg)

        with contextlib.nested(
            mock.patch('setup_marathon_job._log', autospec=True),
            mock.patch('setup_marathon_job.load_system_paasta_config', autospec=True),
        ) as (mock_log, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            actual = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir='fake_soa_dir',
            )
            assert mock_log.call_count == 1
        assert expected == actual
        fake_client.list_apps.assert_called_once_with(embed_failures=True)
        assert fake_client.create_app.call_count == 0
Ejemplo n.º 50
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :returns: A tuple of (status, output) to be used with send_sensu_event"""
    def log_deploy_error(errormsg, level='event'):
        return _log(service=service,
                    line=errormsg,
                    component='deploy',
                    level='event',
                    cluster=cluster,
                    instance=instance)

    short_id = marathon_tools.format_job_id(service, instance)

    cluster = load_system_paasta_config().get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service,
                                                     instance,
                                                     client,
                                                     embed_failures=True)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" %
                             len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service,
                                                     nerve_ns,
                                                     **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks = get_old_happy_unhappy_draining_tasks(
        other_apps, drain_method, service, nerve_ns, bounce_health_params)

    if new_app_running:
        protected_draining_tasks = set()
        if new_app.instances < config['instances']:
            client.scale_app(app_id=new_app.id,
                             instances=config['instances'],
                             force=True)
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(
                min(len(new_app.tasks), new_app.instances) -
                config['instances'], 0)
            task_dict = get_old_happy_unhappy_draining_tasks_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks),
                                         num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            protected_draining_tasks.update(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks),
                                        num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(
                scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_happy = min(len(scaling_app_happy_tasks),
                                      num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(
                scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]
        # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
        # `paasta mark-for-deployment`), then we should undrain them.
        for task in new_app.tasks:
            if task not in protected_draining_tasks:
                drain_method.stop_draining(task)

    # Re-drain any already draining tasks on old apps
    for tasks in old_app_draining_tasks.values():
        for task in tasks:
            drain_method.drain(task)

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_happy_tasks=old_app_live_happy_tasks,
                    old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        loglines = ['Exception raised during deploy of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Ejemplo n.º 51
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    marathon_apps,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
    bounce_margin_factor=1.0,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :param bounce_margin_factor: the multiplication factor used to calculate the number of instances to be drained
    :returns: A tuple of (status, output) to be used with send_sensu_event"""

    def log_deploy_error(errormsg, level='event'):
        return _log(
            service=service,
            line=errormsg,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance
        )

    short_id = marathon_tools.format_job_id(service, instance)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service, instance, marathon_apps)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" % len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service, nerve_ns, system_paasta_config,
                                                     **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    (old_app_live_happy_tasks,
     old_app_live_unhappy_tasks,
     old_app_draining_tasks,
     old_app_at_risk_tasks,
     ) = get_tasks_by_state(
        other_apps,
        drain_method,
        service,
        nerve_ns,
        bounce_health_params,
        system_paasta_config,
    )

    if new_app_running:
        num_at_risk_tasks = get_num_at_risk_tasks(new_app)
        if new_app.instances < config['instances'] + num_at_risk_tasks:
            log.info("Scaling %s from %d to %d instances." %
                     (new_app.id, new_app.instances, config['instances'] + num_at_risk_tasks))
            client.scale_app(app_id=new_app.id, instances=config['instances'] + num_at_risk_tasks, force=True)
        # If we have more than the specified number of instances running, we will want to drain some of them.
        # We will start by draining any tasks running on at-risk hosts.
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(min(len(new_app.tasks), new_app.instances) - config['instances'], 0)
            task_dict = get_tasks_by_state_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
                system_paasta_config,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])
            scaling_app_at_risk_tasks = list(task_dict['at_risk'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks), num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks), num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_at_risk = min(len(scaling_app_at_risk_tasks), num_tasks_to_scale)
            old_app_at_risk_tasks[new_app.id] = set(scaling_app_at_risk_tasks[:tasks_to_move_at_risk])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_at_risk

            tasks_to_move_happy = min(len(scaling_app_happy_tasks), num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]

        # TODO: don't take actions in deploy_service.
        undrain_tasks(
            to_undrain=new_app.tasks,
            leave_draining=old_app_draining_tasks.get(new_app.id, []),
            drain_method=drain_method,
            log_deploy_error=log_deploy_error,
        )

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_happy_tasks=old_app_live_happy_tasks,
                    old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    old_app_at_risk_tasks=old_app_at_risk_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                    bounce_margin_factor=bounce_margin_factor,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        logline = 'Exception raised during deploy of service %s:\n%s' % (service, traceback.format_exc())
        log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Ejemplo n.º 52
0
    def test_setup_service_srv_does_not_exist(self):
        fake_name = 'if_talk_was_cheap'
        fake_instance = 'psychatrists_would_be_broke'
        fake_response = mock.Mock(
            json=mock.Mock(return_value={'message': 'test'}))
        fake_client = mock.MagicMock(get_app=mock.Mock(
            side_effect=marathon.exceptions.NotFoundError(fake_response)))
        full_id = marathon_tools.format_job_id(fake_name, fake_instance, 'oogabooga', 'bananafanafofooga')
        fake_complete = {
            'do': 'you', 'even': 'dota', 'id': full_id,
            'docker_image': 'fake_docker_registry/fake_docker_image',
        }
        fake_bounce = 'trampoline'
        fake_drain_method = 'noop'
        fake_drain_method_params = {}
        with contextlib.nested(
            mock.patch(
                'paasta_tools.marathon_tools.create_complete_config',
                return_value=fake_complete,
                autospec=True,
            ),
            mock.patch(
                'setup_marathon_job.deploy_service',
                return_value=(111, 'Never'),
                autospec=True,
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                'get_bounce_method',
                return_value=fake_bounce,
                autospec=True,
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                'get_drain_method',
                return_value=fake_drain_method,
                autospec=True,
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                'get_drain_method_params',
                return_value=fake_drain_method_params,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.marathon_tools.load_marathon_service_config',
                return_value=self.fake_marathon_service_config,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.marathon_tools.load_service_namespace_config',
                return_value=self.fake_service_namespace_config,
                autospec=True,
            ),
        ) as (
            create_config_patch,
            deploy_service_patch,
            get_bounce_patch,
            get_drain_method_patch,
            get_drain_method_params_patch,
            read_service_conf_patch,
            read_namespace_conf_patch,
        ):
            status, output = setup_marathon_job.setup_service(
                service=fake_name,
                instance=fake_instance,
                client=fake_client,
                marathon_config=self.fake_marathon_config,
                service_marathon_config=self.fake_marathon_service_config,
                soa_dir=None,
            )
            assert status == 111
            assert output == 'Never'

            create_config_patch.assert_called_once_with(
                fake_name,
                fake_instance,
                self.fake_marathon_config
            )
            get_bounce_patch.assert_called_once_with()
            get_drain_method_patch.assert_called_once_with(read_namespace_conf_patch.return_value)
            deploy_service_patch.assert_called_once_with(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=full_id,
                config=fake_complete,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params=fake_drain_method_params,
                nerve_ns=self.fake_marathon_service_config.get_nerve_namespace(),
                bounce_health_params=self.fake_marathon_service_config.get_bounce_health_params(
                    read_namespace_conf_patch.return_value),
                soa_dir=None,
            )
Ejemplo n.º 53
0
    def test_deploy_service_already_bouncing(self):
        fake_bounce = 'areallygoodbouncestrategy'
        fake_drain_method = 'noop'
        fake_name = 'how_many_strings'
        fake_instance = 'will_i_need_to_think_of'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance, 'gityourmom', 'configyourdad')
        fake_config = {'id': fake_id, 'instances': 2}

        old_app_id = ('%s2' % fake_id)
        old_task = mock.Mock(id="old_task_id", app_id=old_app_id)
        old_app = mock.Mock(id=old_app_id, tasks=[old_task])

        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=[old_app]),
            kill_task=mock.Mock(spec=lambda app_id, id, scale=False: None),
        )

        fake_bounce_func = mock.create_autospec(
            bounce_lib.brutal_bounce,
            return_value={
                "create_app": True,
                "tasks_to_drain": [old_task],
            }
        )

        fake_short_id = marathon_tools.format_job_id(fake_name, fake_instance)

        with contextlib.nested(
            mock.patch(
                'paasta_tools.bounce_lib.get_bounce_method_func',
                return_value=fake_bounce_func,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.bounce_lib.bounce_lock_zookeeper',
                side_effect=bounce_lib.LockHeldException,
                autospec=True
            ),
            mock.patch(
                'paasta_tools.bounce_lib.get_happy_tasks',
                autospec=True,
                side_effect=lambda x, _, __, **kwargs: x,
            ),
            mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
            mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
        ) as (_, _, _, _, mock_load_system_paasta_config):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            result = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir='fake_soa_dir',
            )
            assert result == (1, "Instance %s is already being bounced." % fake_short_id)
Ejemplo n.º 54
0
    def test_deploy_service_known_bounce(self):
        fake_bounce = 'areallygoodbouncestrategy'
        fake_drain_method_name = 'noop'
        fake_name = 'how_many_strings'
        fake_instance = 'will_i_need_to_think_of'
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance, 'git11111111', 'config11111111')
        fake_config = {'id': fake_id, 'instances': 2}

        old_app_id = marathon_tools.format_job_id(fake_name, fake_instance, 'git22222222', 'config22222222')
        old_task_to_drain = mock.Mock(id="old_task_to_drain", app_id=old_app_id)
        old_task_is_draining = mock.Mock(id="old_task_is_draining", app_id=old_app_id)
        old_task_dont_drain = mock.Mock(id="old_task_dont_drain", app_id=old_app_id)

        old_app = mock.Mock(id="/%s" % old_app_id, tasks=[old_task_to_drain, old_task_is_draining, old_task_dont_drain])

        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=[old_app]),
            kill_task=mock.Mock(spec=lambda app_id, id, scale=False: None),
        )

        fake_bounce_func = mock.create_autospec(
            bounce_lib.brutal_bounce,
            return_value={
                "create_app": True,
                "tasks_to_drain": [old_task_to_drain],
            }
        )

        fake_drain_method = mock.Mock(is_draining=lambda t: t is old_task_is_draining, is_safe_to_kill=lambda t: True)

        with contextlib.nested(
            mock.patch(
                'paasta_tools.bounce_lib.get_bounce_method_func',
                return_value=fake_bounce_func,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.bounce_lib.bounce_lock_zookeeper',
                autospec=True
            ),
            mock.patch(
                'paasta_tools.bounce_lib.get_happy_tasks',
                autospec=True,
                side_effect=lambda x, _, __, **kwargs: x,
            ),
            mock.patch('paasta_tools.bounce_lib.kill_old_ids', autospec=True),
            mock.patch('paasta_tools.bounce_lib.create_marathon_app', autospec=True),
            mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
            mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
            mock.patch('paasta_tools.drain_lib.get_drain_method', return_value=fake_drain_method),
        ) as (_, _, _, kill_old_ids_patch, create_marathon_app_patch, mock_log, mock_load_system_paasta_config, _):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value='fake_cluster')
            result = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method_name,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir='fake_soa_dir',
            )
            assert result[0] == 0, "Expected successful result; got (%d, %s)" % result
            fake_client.list_apps.assert_called_once_with(embed_failures=True)
            assert fake_client.create_app.call_count == 0
            fake_bounce_func.assert_called_once_with(
                new_config=fake_config,
                new_app_running=False,
                happy_new_tasks=[],
                old_app_live_tasks={old_app.id: set([old_task_to_drain, old_task_dont_drain])},
            )

            assert fake_drain_method.drain.call_count == 2
            fake_drain_method.drain.assert_any_call(old_task_is_draining)
            fake_drain_method.drain.assert_any_call(old_task_to_drain)

            assert fake_client.kill_task.call_count == 2
            fake_client.kill_task.assert_any_call(old_app_id, old_task_is_draining.id, scale=True)
            fake_client.kill_task.assert_any_call(old_app_id, old_task_to_drain.id, scale=True)

            create_marathon_app_patch.assert_called_once_with(fake_config['id'], fake_config, fake_client)
            assert kill_old_ids_patch.call_count == 0

            # We should call _log 5 times:
            # 1. bounce starts
            # 2. create new app
            # 3. draining old tasks
            # 4. remove old apps
            # 5. bounce finishes

            assert mock_log.call_count == 5
Ejemplo n.º 55
0
    def test_setup_service_srv_does_not_exist(self):
        fake_name = 'if_talk_was_cheap'
        fake_instance = 'psychatrists_would_be_broke'
        fake_response = mock.Mock(
            json=mock.Mock(return_value={'message': 'test'}))
        fake_client = mock.MagicMock(get_app=mock.Mock(
            side_effect=marathon.exceptions.NotFoundError(fake_response)))
        full_id = marathon_tools.format_job_id(fake_name, fake_instance, 'oogabooga', 'bananafanafofooga')
        fake_complete = {
            'do': 'you', 'even': 'dota', 'id': full_id,
            'docker_image': 'fake_docker_registry/fake_docker_image',
        }
        fake_bounce = 'trampoline'
        fake_drain_method = 'noop'
        fake_drain_method_params = {}
        with contextlib.nested(
            mock.patch(
                'paasta_tools.marathon_tools.create_complete_config',
                return_value=fake_complete,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.setup_marathon_job.deploy_service',
                return_value=(111, 'Never'),
                autospec=True,
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                'get_bounce_method',
                return_value=fake_bounce,
                autospec=True,
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                'get_drain_method',
                return_value=fake_drain_method,
                autospec=True,
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                'get_drain_method_params',
                return_value=fake_drain_method_params,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.marathon_tools.load_marathon_service_config',
                return_value=self.fake_marathon_service_config,
                autospec=True,
            ),
            mock.patch(
                'paasta_tools.marathon_tools.load_service_namespace_config',
                return_value=self.fake_service_namespace_config,
                autospec=True,
            ),
        ) as (
            create_config_patch,
            deploy_service_patch,
            get_bounce_patch,
            get_drain_method_patch,
            get_drain_method_params_patch,
            read_service_conf_patch,
            read_namespace_conf_patch,
        ):
            status, output = setup_marathon_job.setup_service(
                service=fake_name,
                instance=fake_instance,
                client=fake_client,
                marathon_config=self.fake_marathon_config,
                service_marathon_config=self.fake_marathon_service_config,
                soa_dir=None,
            )
            assert status == 111
            assert output == 'Never'

            create_config_patch.assert_called_once_with(
                fake_name,
                fake_instance,
                self.fake_marathon_config
            )
            get_bounce_patch.assert_called_once_with()
            get_drain_method_patch.assert_called_once_with(read_namespace_conf_patch.return_value)
            deploy_service_patch.assert_called_once_with(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=full_id,
                config=fake_complete,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params=fake_drain_method_params,
                nerve_ns=self.fake_marathon_service_config.get_nerve_namespace(),
                bounce_health_params=self.fake_marathon_service_config.get_bounce_health_params(
                    read_namespace_conf_patch.return_value),
                soa_dir=None,
            )
    def test_setup_service_srv_does_not_exist(self):
        fake_name = "if_talk_was_cheap"
        fake_instance = "psychatrists_would_be_broke"
        fake_response = mock.Mock(json=mock.Mock(return_value={"message": "test"}))
        fake_client = mock.MagicMock(get_app=mock.Mock(side_effect=marathon.exceptions.NotFoundError(fake_response)))
        full_id = marathon_tools.format_job_id(fake_name, fake_instance, "oogabooga", "bananafanafofooga")
        fake_complete = {
            "do": "you",
            "even": "dota",
            "id": full_id,
            "docker_image": "fake_docker_registry/fake_docker_image",
        }
        fake_bounce = "trampoline"
        fake_drain_method = "noop"
        fake_drain_method_params = {}
        with contextlib.nested(
            mock.patch("paasta_tools.marathon_tools.create_complete_config", return_value=fake_complete, autospec=True),
            mock.patch("paasta_tools.setup_marathon_job.deploy_service", return_value=(111, "Never"), autospec=True),
            mock.patch.object(
                self.fake_marathon_service_config, "get_bounce_method", return_value=fake_bounce, autospec=True
            ),
            mock.patch.object(
                self.fake_marathon_service_config, "get_drain_method", return_value=fake_drain_method, autospec=True
            ),
            mock.patch.object(
                self.fake_marathon_service_config,
                "get_drain_method_params",
                return_value=fake_drain_method_params,
                autospec=True,
            ),
            mock.patch(
                "paasta_tools.marathon_tools.load_marathon_service_config",
                return_value=self.fake_marathon_service_config,
                autospec=True,
            ),
            mock.patch(
                "paasta_tools.marathon_tools.load_service_namespace_config",
                return_value=self.fake_service_namespace_config,
                autospec=True,
            ),
        ) as (
            create_config_patch,
            deploy_service_patch,
            get_bounce_patch,
            get_drain_method_patch,
            get_drain_method_params_patch,
            read_service_conf_patch,
            read_namespace_conf_patch,
        ):
            status, output = setup_marathon_job.setup_service(
                service=fake_name,
                instance=fake_instance,
                client=fake_client,
                marathon_config=self.fake_marathon_config,
                service_marathon_config=self.fake_marathon_service_config,
                soa_dir=None,
            )
            assert status == 111
            assert output == "Never"

            create_config_patch.assert_called_once_with(fake_name, fake_instance, self.fake_marathon_config)
            get_bounce_patch.assert_called_once_with()
            get_drain_method_patch.assert_called_once_with(read_namespace_conf_patch.return_value)
            deploy_service_patch.assert_called_once_with(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=full_id,
                config=fake_complete,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method,
                drain_method_params=fake_drain_method_params,
                nerve_ns=self.fake_marathon_service_config.get_nerve_namespace(),
                bounce_health_params=self.fake_marathon_service_config.get_bounce_health_params(
                    read_namespace_conf_patch.return_value
                ),
                soa_dir=None,
            )
    def test_deploy_service_known_bounce(self):
        fake_bounce = "areallygoodbouncestrategy"
        fake_drain_method_name = "noop"
        fake_name = "how_many_strings"
        fake_instance = "will_i_need_to_think_of"
        fake_id = marathon_tools.format_job_id(fake_name, fake_instance, "git11111111", "config11111111")
        fake_config = {"id": fake_id, "instances": 2}

        old_app_id = marathon_tools.format_job_id(fake_name, fake_instance, "git22222222", "config22222222")
        old_task_to_drain = mock.Mock(id="old_task_to_drain", app_id=old_app_id)
        old_task_is_draining = mock.Mock(id="old_task_is_draining", app_id=old_app_id)
        old_task_dont_drain = mock.Mock(id="old_task_dont_drain", app_id=old_app_id)

        old_app = mock.Mock(id="/%s" % old_app_id, tasks=[old_task_to_drain, old_task_is_draining, old_task_dont_drain])

        fake_client = mock.MagicMock(
            list_apps=mock.Mock(return_value=[old_app]), kill_task=mock.Mock(spec=lambda app_id, id, scale=False: None)
        )

        fake_bounce_func = mock.create_autospec(
            bounce_lib.brutal_bounce, return_value={"create_app": True, "tasks_to_drain": [old_task_to_drain]}
        )

        fake_drain_method = mock.Mock(is_draining=lambda t: t is old_task_is_draining, is_safe_to_kill=lambda t: True)

        with contextlib.nested(
            mock.patch("paasta_tools.bounce_lib.get_bounce_method_func", return_value=fake_bounce_func, autospec=True),
            mock.patch("paasta_tools.bounce_lib.bounce_lock_zookeeper", autospec=True),
            mock.patch(
                "paasta_tools.bounce_lib.get_happy_tasks", autospec=True, side_effect=lambda x, _, __, **kwargs: x
            ),
            mock.patch("paasta_tools.bounce_lib.kill_old_ids", autospec=True),
            mock.patch("paasta_tools.bounce_lib.create_marathon_app", autospec=True),
            mock.patch("paasta_tools.setup_marathon_job._log", autospec=True),
            mock.patch("paasta_tools.setup_marathon_job.load_system_paasta_config", autospec=True),
            mock.patch("paasta_tools.drain_lib.get_drain_method", return_value=fake_drain_method),
        ) as (_, _, _, kill_old_ids_patch, create_marathon_app_patch, mock_log, mock_load_system_paasta_config, _):
            mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value="fake_cluster")
            result = setup_marathon_job.deploy_service(
                service=fake_name,
                instance=fake_instance,
                marathon_jobid=fake_id,
                config=fake_config,
                client=fake_client,
                bounce_method=fake_bounce,
                drain_method_name=fake_drain_method_name,
                drain_method_params={},
                nerve_ns=fake_instance,
                bounce_health_params={},
                soa_dir="fake_soa_dir",
            )
            assert result[0] == 0, "Expected successful result; got (%d, %s)" % result
            fake_client.list_apps.assert_called_once_with(embed_failures=True)
            assert fake_client.create_app.call_count == 0
            fake_bounce_func.assert_called_once_with(
                new_config=fake_config,
                new_app_running=False,
                happy_new_tasks=[],
                old_app_live_tasks={old_app.id: set([old_task_to_drain, old_task_dont_drain])},
            )

            assert fake_drain_method.drain.call_count == 2
            fake_drain_method.drain.assert_any_call(old_task_is_draining)
            fake_drain_method.drain.assert_any_call(old_task_to_drain)

            assert fake_client.kill_task.call_count == 2
            fake_client.kill_task.assert_any_call(app_id=old_app_id, task_id=old_task_is_draining.id, scale=True)
            fake_client.kill_task.assert_any_call(app_id=old_app_id, task_id=old_task_to_drain.id, scale=True)

            create_marathon_app_patch.assert_called_once_with(fake_config["id"], fake_config, fake_client)
            assert kill_old_ids_patch.call_count == 0

            # We should call _log 5 times:
            # 1. bounce starts
            # 2. create new app
            # 3. draining old tasks
            # 4. remove old apps
            # 5. bounce finishes

            assert mock_log.call_count == 5
Ejemplo n.º 58
0
def deploy_service(
    service,
    instance,
    marathon_jobid,
    config,
    client,
    bounce_method,
    drain_method_name,
    drain_method_params,
    nerve_ns,
    bounce_health_params,
    soa_dir,
):
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param client: A MarathonClient object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :returns: A tuple of (status, output) to be used with send_sensu_event"""

    def log_deploy_error(errormsg, level='event'):
        return _log(
            service=service,
            line=errormsg,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance
        )

    short_id = marathon_tools.format_job_id(service, instance)

    cluster = load_system_paasta_config().get_cluster()
    existing_apps = marathon_tools.get_matching_apps(service, instance, client, embed_failures=True)
    new_app_list = [a for a in existing_apps if a.id == '/%s' % config['id']]
    other_apps = [a for a in existing_apps if a.id != '/%s' % config['id']]
    serviceinstance = "%s.%s" % (service, instance)

    if new_app_list:
        new_app = new_app_list[0]
        if len(new_app_list) != 1:
            raise ValueError("Only expected one app per ID; found %d" % len(new_app_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(new_app, service, nerve_ns, **bounce_health_params)
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg)

    old_app_live_happy_tasks, old_app_live_unhappy_tasks, old_app_draining_tasks = get_old_happy_unhappy_draining_tasks(
        other_apps,
        drain_method,
        service,
        nerve_ns,
        bounce_health_params
    )

    if new_app_running:
        protected_draining_tasks = set()
        if new_app.instances < config['instances']:
            client.scale_app(app_id=new_app.id, instances=config['instances'], force=True)
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(min(len(new_app.tasks), new_app.instances) - config['instances'], 0)
            task_dict = get_old_happy_unhappy_draining_tasks_for_app(
                new_app,
                drain_method,
                service,
                nerve_ns,
                bounce_health_params,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks), num_tasks_to_scale)
            old_app_draining_tasks[new_app.id] = set(scaling_app_draining_tasks[:tasks_to_move_draining])
            protected_draining_tasks.update(scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks), num_tasks_to_scale)
            old_app_live_unhappy_tasks[new_app.id] = set(scaling_app_unhappy_tasks[:tasks_to_move_unhappy])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_happy = min(len(scaling_app_happy_tasks), num_tasks_to_scale)
            old_app_live_happy_tasks[new_app.id] = set(scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]
        # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
        # `paasta mark-for-deployment`), then we should undrain them.
        for task in new_app.tasks:
            if task not in protected_draining_tasks:
                drain_method.stop_draining(task)

    # Re-drain any already draining tasks on old apps
    for tasks in old_app_draining_tasks.values():
        for task in tasks:
            drain_method.drain(task)

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg)

        try:
            with bounce_lib.bounce_lock_zookeeper(short_id):
                do_bounce(
                    bounce_func=bounce_func,
                    drain_method=drain_method,
                    config=config,
                    new_app_running=new_app_running,
                    happy_new_tasks=happy_new_tasks,
                    old_app_live_happy_tasks=old_app_live_happy_tasks,
                    old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
                    old_app_draining_tasks=old_app_draining_tasks,
                    service=service,
                    bounce_method=bounce_method,
                    serviceinstance=serviceinstance,
                    cluster=cluster,
                    instance=instance,
                    marathon_jobid=marathon_jobid,
                    client=client,
                    soa_dir=soa_dir,
                )

        except bounce_lib.LockHeldException:
            log.error("Instance %s already being bounced. Exiting", short_id)
            return (1, "Instance %s is already being bounced." % short_id)
    except Exception:
        loglines = ['Exception raised during deploy of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            log_deploy_error(logline, level='debug')
        raise

    return (0, 'Service deployed.')
Ejemplo n.º 59
0
def filter_autoscaling_tasks(
    marathon_apps: Sequence[MarathonApp],
    all_mesos_tasks: Sequence[Task],
    config: MarathonServiceConfig,
    system_paasta_config: SystemPaastaConfig,
) -> Tuple[Mapping[str, MarathonTask], Sequence[Task]]:
    """Find the tasks that are serving traffic. We care about this because many tasks have a period of high CPU when
    they first start up, during which they warm up code, load and process data, etc., and we don't want this high load
    to drag our overall load estimate upwards. Allowing these tasks to count towards overall load could cause a cycle of
    scaling up, seeing high load due to new warming-up containers, scaling up, until we hit max_instances.

    However, accidentally omitting a task that actually is serving traffic will cause us to underestimate load; this is
    generally much worse than overestimating, since it can cause us to incorrectly scale down or refuse to scale up when
    necessary. For this reason, we look at several sources of health information, and if they disagree, assume the task
    is serving traffic.
    """
    job_id_prefix = "{}{}".format(
        format_job_id(service=config.service, instance=config.instance),
        MESOS_TASK_SPACER,
    )

    # Get a dict of healthy tasks, we assume tasks with no healthcheck defined are healthy.
    # We assume tasks with no healthcheck results but a defined healthcheck to be unhealthy, unless they are "old" in
    # which case we assume that Marathon has screwed up and stopped healthchecking but that they are healthy.

    log.info("Inspecting %s for autoscaling" % job_id_prefix)

    relevant_tasks_by_app: Dict[MarathonApp, List[MarathonTask]] = {
        app: app.tasks
        for app in marathon_apps
        if app.id.lstrip("/").startswith(job_id_prefix)
    }

    healthy_marathon_tasks: Dict[str, MarathonTask] = {}

    for app, tasks in relevant_tasks_by_app.items():
        for task in tasks:
            if (is_task_healthy(task) or not app.health_checks
                    or is_old_task_missing_healthchecks(task, app)):
                healthy_marathon_tasks[task.id] = task

    service_namespace_config = load_service_namespace_config(
        service=config.service, namespace=config.get_nerve_namespace())
    if service_namespace_config.is_in_smartstack():

        for task in filter_tasks_in_smartstack(
                tasks=[
                    task for tasks in relevant_tasks_by_app.values()
                    for task in tasks
                ],
                service=config.service,
                nerve_ns=config.get_nerve_namespace(),
                system_paasta_config=system_paasta_config,
                max_hosts_to_query=20,
                haproxy_min_fraction_up=
                0.01,  # Be very liberal. See docstring above for rationale.
        ):
            healthy_marathon_tasks[task.id] = task

    if not healthy_marathon_tasks:
        raise MetricsProviderNoDataError(
            "Couldn't find any healthy marathon tasks")
    mesos_tasks = [
        task for task in all_mesos_tasks
        if task["id"] in healthy_marathon_tasks
    ]
    return (healthy_marathon_tasks, mesos_tasks)