Ejemplo n.º 1
0
def get_service_instances_needing_update(
    marathon_clients: MarathonClients,
    instances: Collection[Tuple[str, str]],
    cluster: str,
) -> List[Tuple[str, str, MarathonServiceConfig, str]]:
    marathon_apps = {}
    for marathon_client in marathon_clients.get_all_clients():
        marathon_apps.update(
            {app.id: app
             for app in get_all_marathon_apps(marathon_client)})

    marathon_app_ids = marathon_apps.keys()
    service_instances = []
    for service, instance in instances:
        try:
            config = load_marathon_service_config_no_cache(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=DEFAULT_SOA_DIR,
            )
            config_app = config.format_marathon_app_dict()
            app_id = "/{}".format(config_app["id"])
        # Not ideal but we rely on a lot of user input to create the app dict
        # and we really can't afford to bail if just one app definition is malformed
        except Exception as e:
            print("ERROR: Skipping {}.{} because: '{}'".format(
                service, instance, str(e)))
            continue
        if (app_id not in marathon_app_ids
                or marathon_apps[app_id].instances != config_app["instances"]):
            service_instances.append((service, instance, config, app_id))
    return service_instances
Ejemplo n.º 2
0
def get_service_instances_needing_update(marathon_client, instances, cluster):
    marathon_apps = {
        app.id: app
        for app in get_all_marathon_apps(marathon_client)
    }
    marathon_app_ids = marathon_apps.keys()
    service_instances = []
    for service, instance in instances:
        try:
            config = load_marathon_service_config_no_cache(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=DEFAULT_SOA_DIR,
            )
            config_app = config.format_marathon_app_dict()
            app_id = '/{}'.format(config_app['id'])
        except (NoDockerImageError, InvalidJobNameError,
                NoDeploymentsAvailable) as e:
            print("DEBUG: Skipping %s.%s because: '%s'" %
                  (service, instance, str(e)))
            continue
        if app_id not in marathon_app_ids:
            service_instances.append((service, instance))
        elif marathon_apps[app_id].instances != config_app['instances']:
            service_instances.append((service, instance))
    return service_instances
Ejemplo n.º 3
0
 def get_at_risk_service_instances(self, draining_hosts):
     marathon_apps = get_all_marathon_apps(self.marathon_client,
                                           embed_tasks=True)
     at_risk_tasks = [
         task for app in marathon_apps for task in app.tasks
         if task.host in draining_hosts
     ]
     self.log.info("At risk tasks: {}".format(at_risk_tasks))
     service_instances = []
     for task in at_risk_tasks:
         app_id = task.app_id.strip('/')
         service, instance, _, __ = deformat_job_id(app_id)
         # check we haven't already added this instance,
         # no need to add the same instance to the bounce queue
         # more than once
         if not any([(service, instance) == (si.service, si.instance)
                     for si in service_instances]):
             service_instances.append(
                 ServiceInstance(
                     service=service,
                     instance=instance,
                     cluster=self.config.get_cluster(),
                     bounce_by=int(time.time()),
                     watcher=type(self).__name__,
                     bounce_timers=None,
                     failures=0,
                 ))
     return service_instances
Ejemplo n.º 4
0
def get_service_instances_needing_update(
    marathon_clients: MarathonClients,
    instances: Collection[Tuple[str, str]],
    cluster: str,
) -> List[Tuple[str, str]]:
    marathon_apps = {}
    for marathon_client in marathon_clients.get_all_clients():
        marathon_apps.update(
            {app.id: app
             for app in get_all_marathon_apps(marathon_client)})

    marathon_app_ids = marathon_apps.keys()
    service_instances = []
    for service, instance in instances:
        try:
            config = load_marathon_service_config_no_cache(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=DEFAULT_SOA_DIR,
            )
            config_app = config.format_marathon_app_dict()
            app_id = '/{}'.format(config_app['id'])
        except (NoDockerImageError, InvalidJobNameError,
                NoDeploymentsAvailable, NoSlavesAvailableError) as e:
            print("DEBUG: Skipping {}.{} because: '{}'".format(
                service, instance, str(e)))
            continue
        if app_id not in marathon_app_ids:
            service_instances.append((service, instance))
        elif marathon_apps[app_id].instances != config_app['instances']:
            service_instances.append((service, instance))
    return service_instances
Ejemplo n.º 5
0
    def run(self):
        self.log.info("{} starting up".format(self.name))
        while True:
            service_instance = self.bounce_q.get()
            failures = service_instance.failures
            bounce_timers = self.setup_timers(service_instance)
            self.log.info("{} processing {}.{}".format(
                self.name, service_instance.service,
                service_instance.instance))
            marathon_apps = marathon_tools.get_all_marathon_apps(
                self.marathon_client, embed_failures=True)
            bounce_timers.setup_marathon.start()
            try:
                return_code, bounce_again_in_seconds = deploy_marathon_service(
                    service=service_instance.service,
                    instance=service_instance.instance,
                    client=self.marathon_client,
                    soa_dir=marathon_tools.DEFAULT_SOA_DIR,
                    marathon_config=self.marathon_config,
                    marathon_apps=marathon_apps)
            except Exception as e:
                self.log.warning(
                    "deploy_marathon_service caused exception: {}".format(e))
                return_code = -2
            if return_code != 0:
                failures += 1
                bounce_again_in_seconds = exponential_back_off(
                    failures=failures,
                    factor=self.config.
                    get_deployd_worker_failure_backoff_factor(),
                    base=2,
                    max_time=6000)

            bounce_timers.setup_marathon.stop()
            self.log.info(
                "setup marathon completed with exit code {} for {}.{}".format(
                    return_code, service_instance.service,
                    service_instance.instance))
            if bounce_again_in_seconds:
                bounce_timers.processed_by_worker.start()
                self.log.info(
                    "{}.{} not in steady state so bouncing again in {} "
                    "seconds".format(service_instance.service,
                                     service_instance.instance,
                                     bounce_again_in_seconds))
                service_instance = ServiceInstance(
                    service=service_instance.service,
                    instance=service_instance.instance,
                    bounce_by=int(time.time()) + bounce_again_in_seconds,
                    watcher=self.name,
                    bounce_timers=bounce_timers,
                    failures=failures)
                self.inbox_q.put(service_instance)
            else:
                bounce_timers.bounce_length.stop()
                self.log.info("{}.{} in steady state".format(
                    service_instance.service, service_instance.instance))
            time.sleep(0.1)
Ejemplo n.º 6
0
def assert_marathon_apps(
    clients: Sequence[MarathonClient], ) -> HealthCheckResult:
    num_apps = [len(get_all_marathon_apps(c)) for c in clients]
    if sum(num_apps) < 1:
        return HealthCheckResult(message="CRITICAL: No marathon apps running",
                                 healthy=False)
    else:
        return HealthCheckResult(message="marathon apps: %10d" % sum(num_apps),
                                 healthy=True)
Ejemplo n.º 7
0
def when_setup_service_initiated(context):
    with contextlib.nested(
        mock.patch(
            'paasta_tools.bounce_lib.get_happy_tasks',
            autospec=True,
            # Wrap function call so we can select a subset of tasks or test
            # intermediate steps, like when an app is not completely up
            side_effect=lambda app, _, __, ___, **kwargs: get_happy_tasks(
                app, context.service, "fake_nerve_ns", context.system_paasta_config)[:context.max_tasks],
        ),
        mock.patch('paasta_tools.bounce_lib.bounce_lock_zookeeper', autospec=True),
        mock.patch('paasta_tools.bounce_lib.create_app_lock', autospec=True),
        mock.patch('paasta_tools.bounce_lib.time.sleep', autospec=True),
        mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True),
        mock.patch('paasta_tools.setup_marathon_job._log', autospec=True),
        mock.patch('paasta_tools.marathon_tools.get_config_hash', autospec=True, return_value='confighash'),
        mock.patch('paasta_tools.marathon_tools.get_code_sha_from_dockerurl', autospec=True, return_value='newapp'),
        mock.patch('paasta_tools.marathon_tools.get_docker_url', autospec=True, return_value='busybox'),
        mock.patch('paasta_tools.mesos_maintenance.get_principal', autospec=True),
        mock.patch('paasta_tools.mesos_maintenance.get_secret', autospec=True),
    ) as (
        _,
        _,
        _,
        _,
        mock_load_system_paasta_config,
        _,
        _,
        _,
        _,
        mock_get_principal,
        mock_get_secret,
    ):
        credentials = mesos_maintenance.load_credentials(mesos_secrets='/etc/mesos-slave-secret')
        mock_get_principal.return_value = credentials.principal
        mock_get_secret.return_value = credentials.secret
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value=context.cluster)
        # 120 * 0.5 = 60 seconds
        for _ in xrange(120):
            try:
                marathon_apps = marathon_tools.get_all_marathon_apps(context.marathon_client, embed_failures=True)
                (code, message) = setup_marathon_job.setup_service(
                    service=context.service,
                    instance=context.instance,
                    client=context.marathon_client,
                    marathon_apps=marathon_apps,
                    service_marathon_config=context.new_marathon_service_config,
                    soa_dir='/nail/etc/services',
                )
                assert code == 0, message
                return
            except MarathonHttpError:
                time.sleep(0.5)
        raise Exception("Unable to acquire app lock for setup_marathon_job.setup_service")
Ejemplo n.º 8
0
def main():
    """Attempt to set up a list of marathon service instances given.
    Exits 1 if any service.instance deployment failed.
    This is done in the following order:

    - Load the marathon configuration
    - Connect to marathon
    - Do the following for each service.instance:
        - Load the service instance's configuration
        - Create the complete marathon job configuration
        - Deploy/bounce the service
        - Emit an event about the deployment to sensu"""

    args = parse_args()
    soa_dir = args.soa_dir
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    # Setting up transparent cache for http API calls
    requests_cache.install_cache("setup_marathon_jobs", backend="memory")

    marathon_config = get_main_marathon_config()
    client = marathon_tools.get_marathon_client(
        marathon_config.get_url(),
        marathon_config.get_username(),
        marathon_config.get_password(),
    )
    marathon_apps = marathon_tools.get_all_marathon_apps(client,
                                                         embed_tasks=True)

    num_failed_deployments = 0
    for service_instance in args.service_instance_list:
        try:
            service, instance, _, __ = decompose_job_id(service_instance)
        except InvalidJobNameError:
            log.error(
                "Invalid service instance specified. Format is service%sinstance."
                % SPACER)
            num_failed_deployments = num_failed_deployments + 1
        else:
            if deploy_marathon_service(service, instance, client, soa_dir,
                                       marathon_config, marathon_apps)[0]:
                num_failed_deployments = num_failed_deployments + 1

    requests_cache.uninstall_cache()

    log.debug("%d out of %d service.instances failed to deploy." %
              (num_failed_deployments, len(args.service_instance_list)))

    sys.exit(1 if num_failed_deployments else 0)
Ejemplo n.º 9
0
def when_setup_service_initiated(context):
    with mock.patch(
        'paasta_tools.bounce_lib.get_happy_tasks',
        autospec=True,
        # Wrap function call so we can select a subset of tasks or test
        # intermediate steps, like when an app is not completely up
        side_effect=lambda app, _, __, ___, **kwargs: get_happy_tasks(
            app, context.service, "fake_nerve_ns", context.system_paasta_config,
        )[:context.max_tasks],
    ), mock.patch(
        'paasta_tools.bounce_lib.bounce_lock_zookeeper', autospec=True,
    ), mock.patch(
        'paasta_tools.bounce_lib.time.sleep', autospec=True,
    ), mock.patch(
        'paasta_tools.setup_marathon_job.load_system_paasta_config', autospec=True,
    ) as mock_load_system_paasta_config, mock.patch(
        'paasta_tools.setup_marathon_job._log', autospec=True,
    ), mock.patch(
        'paasta_tools.marathon_tools.get_config_hash', autospec=True, return_value='confighash',
    ), mock.patch(
        'paasta_tools.marathon_tools.get_code_sha_from_dockerurl', autospec=True, return_value='newapp',
    ), mock.patch(
        'paasta_tools.utils.InstanceConfig.get_docker_url', autospec=True, return_value='busybox',
    ), mock.patch(
        'paasta_tools.mesos_maintenance.get_principal', autospec=True,
    ) as mock_get_principal, mock.patch(
        'paasta_tools.mesos_maintenance.get_secret', autospec=True,
    ) as mock_get_secret:
        credentials = mesos_maintenance.load_credentials(mesos_secrets='/etc/mesos-slave-secret')
        mock_get_principal.return_value = credentials.principal
        mock_get_secret.return_value = credentials.secret
        mock_load_system_paasta_config.return_value.get_cluster = mock.Mock(return_value=context.cluster)
        # 120 * 0.5 = 60 seconds
        for _ in range(120):
            try:
                marathon_apps = marathon_tools.get_all_marathon_apps(context.marathon_client, embed_tasks=True)
                (code, message, bounce_again) = setup_marathon_job.setup_service(
                    service=context.service,
                    instance=context.instance,
                    client=context.marathon_client,
                    marathon_apps=marathon_apps,
                    service_marathon_config=context.new_marathon_service_config,
                    soa_dir='/nail/etc/services',
                )
                assert code == 0, message
                return
            except MarathonHttpError:
                time.sleep(0.5)
        raise Exception("Unable to acquire app lock for setup_marathon_job.setup_service")
Ejemplo n.º 10
0
def main():
    """Attempt to set up a list of marathon service instances given.
    Exits 1 if any service.instance deployment failed.
    This is done in the following order:

    - Load the marathon configuration
    - Connect to marathon
    - Do the following for each service.instance:
        - Load the service instance's configuration
        - Create the complete marathon job configuration
        - Deploy/bounce the service
        - Emit an event about the deployment to sensu"""

    args = parse_args()
    soa_dir = args.soa_dir
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    # Setting up transparent cache for http API calls
    requests_cache.install_cache("setup_marathon_jobs", backend="memory")

    marathon_config = get_main_marathon_config()
    client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(),
                                                marathon_config.get_password())
    marathon_apps = marathon_tools.get_all_marathon_apps(client, embed_failures=True)

    num_failed_deployments = 0
    for service_instance in args.service_instance_list:
        try:
            service, instance, _, __ = decompose_job_id(service_instance)
        except InvalidJobNameError:
            log.error("Invalid service instance specified. Format is service%sinstance." % SPACER)
            num_failed_deployments = num_failed_deployments + 1
        else:
            if deploy_marathon_service(service, instance, client, soa_dir, marathon_config, marathon_apps):
                num_failed_deployments = num_failed_deployments + 1

    requests_cache.uninstall_cache()

    log.debug("%d out of %d service.instances failed to deploy." %
              (num_failed_deployments, len(args.service_instance_list)))

    sys.exit(1 if num_failed_deployments else 0)
Ejemplo n.º 11
0
 def run(self):
     self.log.info("{} starting up".format(self.name))
     while True:
         service_instance = self.bounce_q.get()
         bounce_timers = self.setup_timers(service_instance)
         self.log.info("{} processing {}.{}".format(
             self.name, service_instance.service,
             service_instance.instance))
         marathon_apps = marathon_tools.get_all_marathon_apps(
             self.marathon_client, embed_failures=True)
         bounce_timers.setup_marathon.start()
         return_code, bounce_again_in_seconds = deploy_marathon_service(
             service=service_instance.service,
             instance=service_instance.instance,
             client=self.marathon_client,
             soa_dir=marathon_tools.DEFAULT_SOA_DIR,
             marathon_config=self.marathon_config,
             marathon_apps=marathon_apps)
         bounce_timers.setup_marathon.stop()
         self.log.info(
             "setup marathon completed with exit code {} for {}.{}".format(
                 return_code, service_instance.service,
                 service_instance.instance))
         if bounce_again_in_seconds:
             bounce_timers.processed_by_worker.start()
             self.log.info(
                 "{}.{} not in steady state so bouncing again in {} "
                 "seconds".format(service_instance.service,
                                  service_instance.instance,
                                  bounce_again_in_seconds))
             service_instance = ServiceInstance(
                 service=service_instance.service,
                 instance=service_instance.instance,
                 bounce_by=int(time.time()) + bounce_again_in_seconds,
                 watcher=self.name,
                 bounce_timers=bounce_timers)
             self.inbox_q.put(service_instance)
         else:
             bounce_timers.bounce_length.stop()
             self.log.info("{}.{} in steady state".format(
                 service_instance.service, service_instance.instance))
         time.sleep(0.1)
Ejemplo n.º 12
0
    def process_service_instance(self, service_instance):
        bounce_timers = self.setup_timers(service_instance)
        self.log.info("{} processing {}.{}".format(self.name,
                                                   service_instance.service,
                                                   service_instance.instance))
        marathon_apps = marathon_tools.get_all_marathon_apps(
            self.marathon_client, embed_tasks=True)
        bounce_timers.setup_marathon.start()
        return_code, bounce_again_in_seconds = deploy_marathon_service(
            service=service_instance.service,
            instance=service_instance.instance,
            client=self.marathon_client,
            soa_dir=marathon_tools.DEFAULT_SOA_DIR,
            marathon_config=self.marathon_config,
            marathon_apps=marathon_apps,
        )

        bounce_timers.setup_marathon.stop()
        self.log.info(
            "setup marathon completed with exit code {} for {}.{}".format(
                return_code,
                service_instance.service,
                service_instance.instance,
            ))
        if bounce_again_in_seconds:
            bounce_timers.processed_by_worker.start()
            self.log.info("{}.{} not in steady state so bouncing again in {} "
                          "seconds".format(
                              service_instance.service,
                              service_instance.instance,
                              bounce_again_in_seconds,
                          ))
        else:
            bounce_timers.bounce_length.stop()
            self.log.info("{}.{} in steady state".format(
                service_instance.service,
                service_instance.instance,
            ))
        return BounceResults(bounce_again_in_seconds, return_code,
                             bounce_timers)
Ejemplo n.º 13
0
def get_service_instances_needing_update(marathon_client, instances, cluster):
    marathon_apps = {
        app.id: app
        for app in get_all_marathon_apps(marathon_client)
    }
    marathon_app_ids = marathon_apps.keys()
    service_instances = []
    for service, instance in instances:
        config = load_marathon_service_config_no_cache(service=service,
                                                       instance=instance,
                                                       cluster=cluster,
                                                       soa_dir=DEFAULT_SOA_DIR)
        try:
            config_app = config.format_marathon_app_dict()
            app_id = '/{}'.format(config_app['id'])
        except NoDockerImageError:
            config_app = None
        if not config_app:
            service_instances.append((service, instance))
        elif app_id not in marathon_app_ids:
            service_instances.append((service, instance))
        elif marathon_apps[app_id].instances != config_app['instances']:
            service_instances.append((service, instance))
    return service_instances