コード例 #1
0
def main():
    args = parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    service_instances = get_services_for_cluster(
        cluster=cluster,
        instance_type='marathon',
        soa_dir=args.soa_dir,
    )

    config = marathon_tools.load_marathon_config()
    client = marathon_tools.get_marathon_client(config.get_url(),
                                                config.get_username(),
                                                config.get_password())
    all_tasks = client.list_tasks()
    mesos_slaves = get_slaves()
    smartstack_replication_checker = SmartstackReplicationChecker(
        mesos_slaves, system_paasta_config)
    for service, instance in service_instances:

        check_service_replication(
            service=service,
            instance=instance,
            cluster=cluster,
            all_tasks=all_tasks,
            soa_dir=args.soa_dir,
            smartstack_replication_checker=smartstack_replication_checker,
        )
コード例 #2
0
ファイル: cleanup_maintenance.py プロジェクト: rirwin/paasta
def unreserve_all_resources_on_non_draining_hosts():
    """Unreserve all resources on non-draining hosts"""
    log.debug("Unreserving all resources on non-draining hosts")
    slaves = get_slaves()
    hostnames = [slave['hostname'] for slave in slaves]
    draining_hosts = get_draining_hosts()
    non_draining_hosts = list(set(hostnames) - set(draining_hosts))
    unreserve_all_resources(hostnames=non_draining_hosts)
コード例 #3
0
ファイル: bounce_lib.py プロジェクト: Yelp/paasta
def get_happy_tasks(app, service, nerve_ns, system_paasta_config, min_task_uptime=None, check_haproxy=False):
    """Given a MarathonApp object, return the subset of tasks which are considered healthy.
    With the default options, this returns tasks where at least one of the defined Marathon healthchecks passes.
    For it to do anything interesting, set min_task_uptime or check_haproxy.

    :param app: A MarathonApp object.
    :param service: The name of the service.
    :param nerve_ns: The nerve namespace
    :param min_task_uptime: Minimum number of seconds that a task must be running before we consider it healthy. Useful
                            if tasks take a while to start up.
    :param check_haproxy: Whether to check the local haproxy to make sure this task has been registered and discovered.
    """
    tasks = app.tasks
    happy = []
    now = datetime.datetime.utcnow()

    if check_haproxy:
        tasks_in_smartstack = []
        service_namespace = compose_job_id(service, nerve_ns)

        service_namespace_config = marathon_tools.load_service_namespace_config(
            service=service, namespace=nerve_ns)
        discover_location_type = service_namespace_config.get_discover()
        unique_values = mesos_tools.get_mesos_slaves_grouped_by_attribute(
            slaves=mesos_tools.get_slaves(),
            attribute=discover_location_type
        )

        for value, hosts in unique_values.iteritems():
            synapse_hostname = hosts[0]['hostname']
            tasks_in_smartstack.extend(get_registered_marathon_tasks(
                synapse_hostname,
                system_paasta_config.get_synapse_port(),
                system_paasta_config.get_synapse_haproxy_url_format(),
                service_namespace,
                tasks,
            ))
        tasks = tasks_in_smartstack

    for task in tasks:
        if task.started_at is None:
            # Can't be healthy if it hasn't started
            continue

        if min_task_uptime is not None:
            if (now - task.started_at).total_seconds() < min_task_uptime:
                continue

        # if there are healthchecks defined for the app but none have executed yet, then task is unhappy
        if len(app.health_checks) > 0 and len(task.health_check_results) == 0:
            continue

        # if there are health check results, check if at least one healthcheck is passing
        if not marathon_tools.is_task_healthy(task, require_all=False, default_healthy=True):
            continue
        happy.append(task)

    return happy
コード例 #4
0
def status_smartstack_backends(
    service,
    instance,
    job_config,
    service_namespace_config,
    cluster,
    tasks,
    expected_count,
    soa_dir,
    synapse_port,
    synapse_haproxy_url_format,
    system_deploy_blacklist,
    system_deploy_whitelist,
    verbose,
):
    """Returns detailed information about smartstack backends for a service
    and instance.
    return: A newline separated string of the smarststack backend status
    """
    output = []

    registration = job_config.get_registrations()[0]

    discover_location_type = service_namespace_config.get_discover()

    grouped_slaves = get_mesos_slaves_grouped_by_attribute(
        slaves=get_slaves(), attribute=discover_location_type
    )

    # rebuild the dict, replacing the slave object
    # with just their hostname
    grouped_slave_hostname = {
        attribute_value: [slave["hostname"] for slave in slaves]
        for attribute_value, slaves in grouped_slaves.items()
    }

    if len(grouped_slave_hostname) == 0:
        output.append(
            "Smartstack: ERROR - %s is NOT in smartstack at all!" % registration
        )
    else:
        output.append("Smartstack:")
        if verbose:
            output.append("  Haproxy Service Name: %s" % registration)
            output.append("  Backends:")

        output.extend(
            pretty_print_smartstack_backends_for_locations(
                registration=registration,
                tasks=tasks,
                locations=grouped_slave_hostname,
                expected_count=expected_count,
                verbose=verbose,
                synapse_port=synapse_port,
                synapse_haproxy_url_format=synapse_haproxy_url_format,
            )
        )
    return "\n".join(output)
コード例 #5
0
def marathon_service_mesh_status(
    service: str,
    service_mesh: pik.ServiceMesh,
    instance: str,
    job_config: marathon_tools.MarathonServiceConfig,
    service_namespace_config: ServiceNamespaceConfig,
    tasks: Sequence[MarathonTask],
    should_return_individual_backends: bool = False,
) -> Mapping[str, Any]:
    registration = job_config.get_registrations()[0]
    discover_location_type = service_namespace_config.get_discover()

    grouped_slaves = get_mesos_slaves_grouped_by_attribute(
        slaves=get_slaves(), attribute=discover_location_type)

    # rebuild the dict, replacing the slave object with just their hostname
    slave_hostname_by_location = {
        attribute_value: [slave["hostname"] for slave in slaves]
        for attribute_value, slaves in grouped_slaves.items()
    }

    expected_instance_count = marathon_tools.get_expected_instance_count_for_namespace(
        service, instance, settings.cluster)
    expected_count_per_location = int(expected_instance_count /
                                      len(slave_hostname_by_location))
    service_mesh_status: MutableMapping[str, Any] = {
        "registration": registration,
        "expected_backends_per_location": expected_count_per_location,
        "locations": [],
    }

    for location, hosts in slave_hostname_by_location.items():
        if service_mesh == pik.ServiceMesh.SMARTSTACK:
            service_mesh_status["locations"].append(
                _build_smartstack_location_dict_for_backends(
                    synapse_host=hosts[0],
                    registration=registration,
                    tasks=tasks,
                    location=location,
                    should_return_individual_backends=
                    should_return_individual_backends,
                ))
        elif service_mesh == pik.ServiceMesh.ENVOY:
            service_mesh_status["locations"].append(
                _build_envoy_location_dict_for_backends(
                    envoy_host=hosts[0],
                    registration=registration,
                    tasks=tasks,
                    location=location,
                    should_return_individual_backends=
                    should_return_individual_backends,
                ))

    return service_mesh_status
コード例 #6
0
ファイル: cleanup_maintenance.py プロジェクト: oktopuz/paasta
def unreserve_all_resources_on_non_draining_hosts():
    """Unreserve all resources on non-draining hosts"""
    log.debug("Unreserving all resources on non-draining hosts")
    slaves = get_slaves()
    hostnames = [slave['hostname'] for slave in slaves]
    draining_hosts = get_draining_hosts()
    non_draining_hosts = list(set(hostnames) - set(draining_hosts))
    if non_draining_hosts:
        unreserve_all_resources(hostnames=non_draining_hosts)
    else:
        log.debug("No non-draining hosts")
コード例 #7
0
def marathon_smartstack_status(
    service: str,
    instance: str,
    job_config: marathon_tools.MarathonServiceConfig,
    service_namespace_config: ServiceNamespaceConfig,
    tasks: Sequence[MarathonTask],
    should_return_individual_backends: bool = False,
) -> Mapping[str, Any]:
    registration = job_config.get_registrations()[0]
    discover_location_type = service_namespace_config.get_discover()

    grouped_slaves = get_mesos_slaves_grouped_by_attribute(
        slaves=get_slaves(), attribute=discover_location_type
    )

    # rebuild the dict, replacing the slave object with just their hostname
    slave_hostname_by_location = {
        attribute_value: [slave["hostname"] for slave in slaves]
        for attribute_value, slaves in grouped_slaves.items()
    }

    expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(
        service, instance, settings.cluster
    )
    expected_count_per_location = int(
        expected_smartstack_count / len(slave_hostname_by_location)
    )
    smartstack_status: MutableMapping[str, Any] = {
        "registration": registration,
        "expected_backends_per_location": expected_count_per_location,
        "locations": [],
    }

    for location, hosts in slave_hostname_by_location.items():
        synapse_host = hosts[0]
        sorted_backends = sorted(
            get_backends(
                registration,
                synapse_host=synapse_host,
                synapse_port=settings.system_paasta_config.get_synapse_port(),
                synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(),
            ),
            key=lambda backend: backend["status"],
            reverse=True,  # put 'UP' backends above 'MAINT' backends
        )
        matched_backends_and_tasks = match_backends_and_tasks(sorted_backends, tasks)
        location_dict = build_smartstack_location_dict(
            location, matched_backends_and_tasks, should_return_individual_backends
        )
        smartstack_status["locations"].append(location_dict)

    return smartstack_status
コード例 #8
0
    def get_routing_constraints(self, service_namespace_config):
        """
        Returns a set of constraints in order to evenly group a marathon
        application amongst instances of a discovery type.
        If, for example, a given app's 'discover' key is set to 'region', then this function
        computes the constraints required to group the app evenly amongst each
        of the actual 'region' values in the cluster.
        It does so by querying the value of the region attribute for each slave
        in the cluster, returning a GROUP_BY constraint where the value is the
        number of unique regions.

        :param service_namespace_config: the config for this service
        :returns: a set of constraints for marathon
        """
        discover_level = service_namespace_config.get_discover()
        slaves = get_slaves()
        if not slaves:
            raise NoSlavesAvailableError(
                "No slaves could be found in the cluster."
            )
        filtered_slaves = filter_mesos_slaves_by_blacklist(
            slaves=slaves,
            blacklist=self.get_deploy_blacklist(),
            whitelist=self.get_deploy_whitelist(),
        )
        if not filtered_slaves:
            raise NoSlavesAvailableError(
                ("No suitable slaves could be found in the cluster for %s.%s"
                 "There are %d total slaves in the cluster, but after filtering"
                 " those available to the app according to the constraints set"
                 " by the deploy_blacklist and deploy_whitelist, there are 0"
                 " available.") % (self.service, self.instance, len(slaves))
            )

        value_dict = get_mesos_slaves_grouped_by_attribute(
            filtered_slaves,
            discover_level
        )
        routing_constraints = [[discover_level, "GROUP_BY", str(len(value_dict.keys()))]]
        return routing_constraints
コード例 #9
0
ファイル: marathon_tools.py プロジェクト: Yelp/paasta
    def get_routing_constraints(self, service_namespace_config):
        """
        Returns a set of constraints in order to evenly group a marathon
        application amongst instances of a discovery type.
        If, for example, a given app's 'discover' key is set to 'region', then this function
        computes the constraints required to group the app evenly amongst each
        of the actual 'region' values in the cluster.
        It does so by querying the value of the region attribute for each slave
        in the cluster, returning a GROUP_BY constraint where the value is the
        number of unique regions.

        :param service_namespace_config: the config for this service
        :returns: a set of constraints for marathon
        """
        discover_level = service_namespace_config.get_discover()
        slaves = get_slaves()
        if not slaves:
            raise NoSlavesAvailableError(
                "No slaves could be found in the cluster."
            )
        filtered_slaves = filter_mesos_slaves_by_blacklist(
            slaves=slaves,
            blacklist=self.get_deploy_blacklist(),
            whitelist=self.get_deploy_whitelist(),
        )
        if not filtered_slaves:
            raise NoSlavesAvailableError(
                ("No suitable slaves could be found in the cluster for %s.%s"
                 "There are %d total slaves in the cluster, but after filtering"
                 " those available to the app according to the constraints set"
                 " by the deploy_blacklist and deploy_whitelist, there are 0"
                 " available.") % (self.service, self.instance, len(slaves))
            )

        value_dict = get_mesos_slaves_grouped_by_attribute(
            filtered_slaves,
            discover_level
        )
        routing_constraints = [[discover_level, "GROUP_BY", str(len(value_dict.keys()))]]
        return routing_constraints
コード例 #10
0
def main():
    args = parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    clients = marathon_tools.get_marathon_clients(
        marathon_tools.get_marathon_servers(system_paasta_config))
    all_clients = clients.get_all_clients()
    all_tasks = []
    for client in all_clients:
        all_tasks.extend(client.list_tasks())
    mesos_slaves = get_slaves()
    smartstack_replication_checker = SmartstackReplicationChecker(
        mesos_slaves, system_paasta_config)

    for service in list_services(soa_dir=args.soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=args.soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster,
                instance_type_class=marathon_tools.MarathonServiceConfig,
        ):
            if instance_config.get_docker_image():
                check_service_replication(
                    instance_config=instance_config,
                    all_tasks=all_tasks,
                    smartstack_replication_checker=
                    smartstack_replication_checker,
                )
            else:
                log.debug(
                    '%s is not deployed. Skipping replication monitoring.' %
                    instance_config.job_id, )
コード例 #11
0
ファイル: bounce_lib.py プロジェクト: edric-shen/paasta
def get_happy_tasks(app,
                    service,
                    nerve_ns,
                    system_paasta_config,
                    min_task_uptime=None,
                    check_haproxy=False):
    """Given a MarathonApp object, return the subset of tasks which are considered healthy.
    With the default options, this returns tasks where at least one of the defined Marathon healthchecks passes.
    For it to do anything interesting, set min_task_uptime or check_haproxy.

    :param app: A MarathonApp object.
    :param service: The name of the service.
    :param nerve_ns: The nerve namespace
    :param min_task_uptime: Minimum number of seconds that a task must be running before we consider it healthy. Useful
                            if tasks take a while to start up.
    :param check_haproxy: Whether to check the local haproxy to make sure this task has been registered and discovered.
    """
    tasks = app.tasks
    happy = []
    now = datetime.datetime.utcnow()

    if check_haproxy:
        tasks_in_smartstack = []
        service_namespace = compose_job_id(service, nerve_ns)

        service_namespace_config = marathon_tools.load_service_namespace_config(
            service=service, namespace=nerve_ns)
        discover_location_type = service_namespace_config.get_discover()
        unique_values = mesos_tools.get_mesos_slaves_grouped_by_attribute(
            slaves=mesos_tools.get_slaves(), attribute=discover_location_type)

        for value, hosts in unique_values.iteritems():
            synapse_hostname = hosts[0]['hostname']
            tasks_in_smartstack.extend(
                get_registered_marathon_tasks(
                    synapse_hostname,
                    system_paasta_config.get_synapse_port(),
                    system_paasta_config.get_synapse_haproxy_url_format(),
                    service_namespace,
                    tasks,
                ))
        tasks = tasks_in_smartstack

    for task in tasks:
        if task.started_at is None:
            # Can't be healthy if it hasn't started
            continue

        if min_task_uptime is not None:
            if (now - task.started_at).total_seconds() < min_task_uptime:
                continue

        # if there are healthchecks defined for the app but none have executed yet, then task is unhappy
        if len(app.health_checks) > 0 and len(task.health_check_results) == 0:
            continue

        # if there are health check results, check if at least one healthcheck is passing
        if not marathon_tools.is_task_healthy(
                task, require_all=False, default_healthy=True):
            continue
        happy.append(task)

    return happy