예제 #1
0
    def kill_tasks_if_necessary(self, driver: MesosSchedulerDriver):
        base_task = self.service_config.base_task(self.system_paasta_config)

        all_tasks_with_params = self.task_store.get_all_tasks()

        new_tasks_with_params = self.get_new_tasks(base_task['name'], all_tasks_with_params)
        happy_new_tasks_with_params = self.get_happy_tasks(new_tasks_with_params)

        desired_instances = self.service_config.get_desired_instances()
        # this puts the most-desired tasks first. I would have left them in order of bad->good and used
        # new_tasks_by_desirability[:-desired_instances] instead, but list[:-0] is an empty list, rather than the full
        # list.
        new_task_ids_by_desirability = sorted(
            list(new_tasks_with_params.keys()),
            key=self.make_healthiness_sorter(base_task['name'], all_tasks_with_params),
            reverse=True,
        )
        new_task_ids_to_kill = new_task_ids_by_desirability[desired_instances:]

        old_tasks_with_params = self.get_old_tasks(base_task['name'], all_tasks_with_params)
        old_draining_tasks_with_params = self.get_draining_tasks(old_tasks_with_params)
        old_non_draining_tasks = sorted(
            list(
                set(old_tasks_with_params.keys()) -
                set(old_draining_tasks_with_params),
            ),
            key=self.make_healthiness_sorter(base_task['name'], all_tasks_with_params),
            reverse=True,
        )

        actions = bounce_lib.crossover_bounce(
            new_config={"instances": desired_instances},
            new_app_running=True,
            happy_new_tasks=happy_new_tasks_with_params.keys(),
            old_non_draining_tasks=new_task_ids_to_kill + old_non_draining_tasks,
        )

        with a_sync.idle_event_loop():
            futures = []
            for task in set(new_tasks_with_params.keys()) - set(actions['tasks_to_drain']):
                futures.append(asyncio.ensure_future(self.undrain_task(task)))
            for task in actions['tasks_to_drain']:
                futures.append(asyncio.ensure_future(self.drain_task(task)))

            if futures:
                a_sync.block(asyncio.wait, futures)

            async def kill_if_safe_to_kill(task_id: str):
                if await self.drain_method.is_safe_to_kill(self.make_drain_task(task_id)):
                    self.kill_task(driver, task_id)

            futures = []
            for task, parameters in all_tasks_with_params.items():
                if parameters.is_draining and parameters.mesos_task_state in LIVE_TASK_STATES:
                    futures.append(asyncio.ensure_future(kill_if_safe_to_kill(task)))
            if futures:
                a_sync.block(asyncio.wait, futures)
예제 #2
0
def deploy_marathon_service(
    service: str,
    instance: str,
    clients: marathon_tools.MarathonClients,
    soa_dir: str,
    marathon_apps_with_clients: Optional[Collection[Tuple[MarathonApp,
                                                          MarathonClient]]],
) -> Tuple[int, float]:
    """deploy the service instance given and proccess return code
    if there was an error we send a sensu alert.

    :param service: The service name to setup
    :param instance: The instance of the service to setup
    :param clients: A MarathonClients object
    :param soa_dir: Path to yelpsoa configs
    :param marathon_apps: A list of all marathon app objects
    :returns: A tuple of (status, bounce_in_seconds) to be used by paasta-deployd
        bounce_in_seconds instructs how long until the deployd should try another bounce
        None means that it is in a steady state and doesn't need to bounce again
    """
    short_id = marathon_tools.format_job_id(service, instance)
    try:
        with bounce_lib.bounce_lock_zookeeper(short_id):
            try:
                service_instance_config = marathon_tools.load_marathon_service_config_no_cache(
                    service,
                    instance,
                    load_system_paasta_config().get_cluster(),
                    soa_dir=soa_dir,
                )
            except NoDeploymentsAvailable:
                log.debug(
                    "No deployments found for %s.%s in cluster %s. Skipping." %
                    (service, instance,
                     load_system_paasta_config().get_cluster()))
                return 0, None
            except NoConfigurationForServiceError:
                error_msg = "Could not read marathon configuration file for %s.%s in cluster %s" % \
                            (service, instance, load_system_paasta_config().get_cluster())
                log.error(error_msg)
                return 1, None

            if marathon_apps_with_clients is None:
                marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients(
                    clients=clients.get_all_clients_for_service(
                        job_config=service_instance_config),
                    embed_tasks=True,
                )

            try:
                with a_sync.idle_event_loop():
                    status, output, bounce_again_in_seconds = setup_service(
                        service=service,
                        instance=instance,
                        clients=clients,
                        job_config=service_instance_config,
                        marathon_apps_with_clients=marathon_apps_with_clients,
                        soa_dir=soa_dir,
                    )
                sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK
                send_event(service, instance, soa_dir, sensu_status, output)
                return 0, bounce_again_in_seconds
            except (KeyError, TypeError, AttributeError, InvalidInstanceConfig,
                    NoSlavesAvailableError):
                error_str = traceback.format_exc()
                log.error(error_str)
                send_event(service, instance, soa_dir,
                           pysensu_yelp.Status.CRITICAL, error_str)
                return 1, None
    except bounce_lib.LockHeldException:
        log.error("Instance %s already being bounced. Exiting", short_id)
        return 0, None