Ejemplo n.º 1
0
def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None):
    """Performs a start/stop/restart/status on an instance
    :param command: String of start, stop, restart, status
    :param service: service name
    :param instance: instance name, like "main" or "canary"
    :param cluster: cluster name
    :param verbose: int verbosity level
    :returns: A unix-style return code
    """
    system_config = load_system_paasta_config()

    marathon_config = marathon_tools.load_marathon_config()
    job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir)
    if not app_id:
        try:
            app_id = job_config.format_marathon_app_dict()['id']
        except NoDockerImageError:
            job_id = compose_job_id(service, instance)
            print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id
            return 1

    normal_instance_count = job_config.get_instances()
    normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, cluster)
    proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, cluster, soa_dir=soa_dir)

    client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(),
                                                marathon_config.get_password())
    if command == 'restart':
        restart_marathon_job(service, instance, app_id, client, cluster)
    elif command == 'status':
        print status_desired_state(service, instance, client, job_config)
        print status_marathon_job(service, instance, app_id, normal_instance_count, client)
        tasks, out = status_marathon_job_verbose(service, instance, client)
        if verbose > 0:
            print out
        print status_mesos_tasks(service, instance, normal_instance_count)
        if verbose > 0:
            tail_lines = calculate_tail_lines(verbose_level=verbose)
            print status_mesos_tasks_verbose(
                job_id=app_id,
                get_short_task_id=get_short_task_id,
                tail_lines=tail_lines,
            )
        if proxy_port is not None:
            print status_smartstack_backends(
                service=service,
                instance=instance,
                cluster=cluster,
                job_config=job_config,
                tasks=tasks,
                expected_count=normal_smartstack_count,
                soa_dir=soa_dir,
                verbose=verbose > 0,
                synapse_port=system_config.get_synapse_port(),
                synapse_haproxy_url_format=system_config.get_synapse_haproxy_url_format(),
            )
    else:
        # The command parser shouldn't have let us get this far...
        raise NotImplementedError("Command %s is not implemented!" % command)
    return 0
Ejemplo n.º 2
0
def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None):
    """Performs a start/stop/restart/status on an instance
    :param command: String of start, stop, restart, status
    :param service: service name
    :param instance: instance name, like "main" or "canary"
    :param cluster: cluster name
    :param verbose: int verbosity level
    :returns: A unix-style return code
    """
    system_config = load_system_paasta_config()

    marathon_config = marathon_tools.load_marathon_config()
    job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir)
    if not app_id:
        try:
            app_id = job_config.format_marathon_app_dict()['id']
        except NoDockerImageError:
            job_id = compose_job_id(service, instance)
            print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id
            return 1

    normal_instance_count = job_config.get_instances()
    normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, cluster)
    proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, cluster, soa_dir=soa_dir)

    client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(),
                                                marathon_config.get_password())
    if command == 'restart':
        restart_marathon_job(service, instance, app_id, client, cluster)
    elif command == 'status':
        print status_desired_state(service, instance, client, job_config)
        print status_marathon_job(service, instance, app_id, normal_instance_count, client)
        tasks, out = status_marathon_job_verbose(service, instance, client)
        if verbose > 0:
            print out
        print status_mesos_tasks(service, instance, normal_instance_count)
        if verbose > 0:
            tail_lines = calculate_tail_lines(verbose_level=verbose)
            print status_mesos_tasks_verbose(
                job_id=app_id,
                get_short_task_id=get_short_task_id,
                tail_lines=tail_lines,
            )
        if proxy_port is not None:
            print status_smartstack_backends(
                service=service,
                instance=instance,
                cluster=cluster,
                job_config=job_config,
                tasks=tasks,
                expected_count=normal_smartstack_count,
                soa_dir=soa_dir,
                verbose=verbose > 0,
                synapse_port=system_config.get_synapse_port(),
                synapse_haproxy_url_format=system_config.get_synapse_haproxy_url_format(),
            )
    else:
        # The command parser shouldn't have let us get this far...
        raise NotImplementedError("Command %s is not implemented!" % command)
    return 0
Ejemplo n.º 3
0
async def job_status(
    kstatus: MutableMapping[str, Any],
    client: kubernetes_tools.KubeClient,
    job_config: LongRunningServiceConfig,
    pod_list: Sequence[V1Pod],
    replicaset_list: Sequence[V1ReplicaSet],
    verbose: int,
    namespace: str,
) -> None:
    app_id = job_config.get_sanitised_deployment_name()
    kstatus["app_id"] = app_id
    kstatus["pods"] = []
    kstatus["replicasets"] = []
    if verbose > 0:
        num_tail_lines = calculate_tail_lines(verbose)

        kstatus["pods"] = await asyncio.gather(
            *[pod_info(pod, client, num_tail_lines) for pod in pod_list])

        for replicaset in replicaset_list:
            try:
                ready_replicas = replicaset.status.ready_replicas
                if ready_replicas is None:
                    ready_replicas = 0
            except AttributeError:
                ready_replicas = 0

            kstatus["replicasets"].append({
                "name":
                replicaset.metadata.name,
                "replicas":
                replicaset.spec.replicas,
                "ready_replicas":
                ready_replicas,
                "create_timestamp":
                replicaset.metadata.creation_timestamp.timestamp(),
            })

    kstatus["expected_instance_count"] = job_config.get_instances()

    app = kubernetes_tools.get_kubernetes_app_by_name(name=app_id,
                                                      kube_client=client,
                                                      namespace=namespace)
    desired_instances = (job_config.get_instances()
                         if job_config.get_desired_state() != "stop" else 0)
    deploy_status, message = await kubernetes_tools.get_kubernetes_app_deploy_status(
        app=app,
        kube_client=client,
        desired_instances=desired_instances,
    )
    kstatus[
        "deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring(
            deploy_status)
    kstatus["deploy_status_message"] = message
    kstatus["running_instance_count"] = (app.status.ready_replicas
                                         if app.status.ready_replicas else 0)
    kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp()
    kstatus["namespace"] = app.metadata.namespace
Ejemplo n.º 4
0
async def marathon_mesos_status(service: str, instance: str,
                                verbose: int) -> MutableMapping[str, Any]:
    mesos_status: MutableMapping[str, Any] = {}

    job_id = marathon_tools.format_job_id(service, instance)
    job_id_filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}"

    try:
        running_and_active_tasks = select_tasks_by_id(
            await get_cached_list_of_running_tasks_from_frameworks(),
            job_id=job_id_filter_string,
        )
    except (ReadTimeout, asyncio.TimeoutError):
        return {
            "error_message":
            "Talking to Mesos timed out. It may be overloaded."
        }

    mesos_status["running_task_count"] = len(running_and_active_tasks)

    if verbose > 0:
        num_tail_lines = calculate_tail_lines(verbose)
        running_task_dict_futures = []

        for task in running_and_active_tasks:
            running_task_dict_futures.append(
                asyncio.ensure_future(
                    get_mesos_running_task_dict(task, num_tail_lines)))

        non_running_tasks = select_tasks_by_id(
            await get_cached_list_of_not_running_tasks_from_frameworks(),
            job_id=job_id_filter_string,
        )
        non_running_tasks.sort(
            key=lambda task: get_first_status_timestamp(task) or 0)
        non_running_tasks = list(reversed(non_running_tasks[-10:]))
        non_running_task_dict_futures = []
        for task in non_running_tasks:
            non_running_task_dict_futures.append(
                asyncio.ensure_future(
                    get_mesos_non_running_task_dict(task, num_tail_lines)))

        all_task_dict_futures = (running_task_dict_futures +
                                 non_running_task_dict_futures)
        if len(all_task_dict_futures):
            await asyncio.wait(all_task_dict_futures)

        mesos_status["running_tasks"] = [
            task_future.result() for task_future in running_task_dict_futures
        ]
        mesos_status["non_running_tasks"] = [
            task_future.result()
            for task_future in non_running_task_dict_futures
        ]

    return mesos_status
Ejemplo n.º 5
0
def format_chronos_job_status(client, job, running_tasks, verbose=0):
    """Given a job, returns a pretty-printed human readable output regarding
    the status of the job.

    :param job: dictionary of the job status
    :param running_tasks: a list of Mesos tasks associated with ``job``, e.g. the
                          result of ``mesos_tools.get_running_tasks_from_frameworks()``.
    :param verbose: int verbosity level
    """
    job_name = _format_job_name(job)
    is_temporary = chronos_tools.is_temporary_job(
        job) if 'name' in job else 'UNKNOWN'
    job_name = modify_string_for_rerun_status(job_name, is_temporary)
    disabled_state = _format_disabled_status(job)
    service, instance = chronos_tools.decompose_job_id(job['name'])
    chronos_state = chronos_tools.get_chronos_status_for_job(
        client, service, instance)

    (last_result, formatted_time) = _format_last_result(job)

    job_type = chronos_tools.get_job_type(job)
    schedule_type = _get_schedule_field_for_job_type(job_type)
    schedule_formatter = get_schedule_formatter(job_type, verbose)
    schedule_value = schedule_formatter(job)

    command = _format_command(job)
    mesos_status = _format_mesos_status(running_tasks)
    if verbose > 0:
        tail_lines = calculate_tail_lines(verbose_level=verbose)
        mesos_status_verbose = status_mesos_tasks_verbose(
            job_id=job["name"],
            get_short_task_id=get_short_task_id,
            tail_lines=tail_lines,
        )
        mesos_status = "%s\n%s" % (mesos_status, mesos_status_verbose)
    return ("Job:     %(job_name)s\n"
            "  Status:   %(disabled_state)s (%(chronos_state)s)"
            "  Last:     %(last_result)s (%(formatted_time)s)\n"
            "  %(schedule_type)s: %(schedule_value)s\n"
            "  Command:  %(command)s\n"
            "  Mesos:    %(mesos_status)s" % {
                "job_name": job_name,
                "is_temporary": is_temporary,
                "schedule_type": schedule_type,
                "chronos_state": PaastaColors.grey(chronos_state),
                "disabled_state": disabled_state,
                "last_result": last_result,
                "formatted_time": formatted_time,
                "schedule_value": schedule_value,
                "command": command,
                "mesos_status": mesos_status,
            })
Ejemplo n.º 6
0
def format_chronos_job_status(client, job, running_tasks, verbose=0):
    """Given a job, returns a pretty-printed human readable output regarding
    the status of the job.

    :param job: dictionary of the job status
    :param running_tasks: a list of Mesos tasks associated with ``job``, e.g. the
                          result of ``mesos_tools.get_running_tasks_from_active_frameworks()``.
    :param verbose: int verbosity level
    """
    job_name = _format_job_name(job)
    is_temporary = chronos_tools.is_temporary_job(job) if 'name' in job else 'UNKNOWN'
    job_name = modify_string_for_rerun_status(job_name, is_temporary)
    disabled_state = _format_disabled_status(job)
    service, instance = chronos_tools.decompose_job_id(job['name'])
    chronos_state = chronos_tools.get_chronos_status_for_job(client, service, instance)

    (last_result, formatted_time) = _format_last_result(job)

    job_type = chronos_tools.get_job_type(job)
    schedule_type = _get_schedule_field_for_job_type(job_type)
    schedule_formatter = get_schedule_formatter(job_type, verbose)
    schedule_value = schedule_formatter(job)

    command = _format_command(job)
    mesos_status = _format_mesos_status(job, running_tasks)
    if verbose > 0:
        tail_lines = calculate_tail_lines(verbose_level=verbose)
        mesos_status_verbose = status_mesos_tasks_verbose(
            job_id=job["name"],
            get_short_task_id=get_short_task_id,
            tail_lines=tail_lines,
        )
        mesos_status = "%s\n%s" % (mesos_status, mesos_status_verbose)
    return (
        "Job:     %(job_name)s\n"
        "  Status:   %(disabled_state)s (%(chronos_state)s)"
        "  Last:     %(last_result)s (%(formatted_time)s)\n"
        "  %(schedule_type)s: %(schedule_value)s\n"
        "  Command:  %(command)s\n"
        "  Mesos:    %(mesos_status)s" % {
            "job_name": job_name,
            "is_temporary": is_temporary,
            "schedule_type": schedule_type,
            "chronos_state": PaastaColors.grey(chronos_state),
            "disabled_state": disabled_state,
            "last_result": last_result,
            "formatted_time": formatted_time,
            "schedule_value": schedule_value,
            "command": command,
            "mesos_status": mesos_status,
        }
    )
Ejemplo n.º 7
0
def perform_command(command, service, instance, cluster, verbose, soa_dir):
    tail_lines = calculate_tail_lines(verbose_level=verbose)

    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    task_id_prefix = "{}{}".format(compose_job_id(service, instance), MESOS_TASK_SPACER)

    if command == 'status':
        paasta_print(status_mesos_tasks_verbose(
            job_id=task_id_prefix,
            get_short_task_id=lambda x: x,
            tail_lines=tail_lines,
        ))
Ejemplo n.º 8
0
def status_mesos_tasks(
    service: str,
    instance: str,
    normal_instance_count: int,
    verbose: int,
) -> str:
    job_id = marathon_tools.format_job_id(service, instance)
    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}"

    try:
        count = len(
            select_tasks_by_id(
                a_sync.block(get_cached_list_of_running_tasks_from_frameworks),
                filter_string))
        if count >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            count_str = PaastaColors.green("(%d/%d)" %
                                           (count, normal_instance_count))
        elif count == 0:
            status = PaastaColors.red("Critical")
            count_str = PaastaColors.red("(%d/%d)" %
                                         (count, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            count_str = PaastaColors.yellow("(%d/%d)" %
                                            (count, normal_instance_count))
        running_string = PaastaColors.bold('TASK_RUNNING')
        output = f"Mesos:      {status} - {count_str} tasks in the {running_string} state."
    except ReadTimeout:
        return "Error: talking to Mesos timed out. It may be overloaded."

    if verbose > 0:
        tail_lines = calculate_tail_lines(verbose_level=verbose)
        output += '\n' + status_mesos_tasks_verbose(
            filter_string=filter_string,
            get_short_task_id=get_short_task_id,
            tail_lines=tail_lines,
        )

    return output
Ejemplo n.º 9
0
async def get_pod_status_tasks_by_replicaset(
    pods_task: "asyncio.Future[V1Pod]",
    backends_task: "asyncio.Future[Dict[str, Any]]",
    client: kubernetes_tools.KubeClient,
    verbose: int,
) -> Dict[str, List["asyncio.Future[Dict[str, Any]]"]]:
    num_tail_lines = calculate_tail_lines(verbose)
    pods = await pods_task
    tasks_by_replicaset: DefaultDict[
        str, List["asyncio.Future[Dict[str, Any]]"]
    ] = defaultdict(list)
    for pod in pods:
        for owner_reference in pod.metadata.owner_references:
            if owner_reference.kind == "ReplicaSet":
                pod_status_task = asyncio.create_task(
                    get_pod_status(pod, backends_task, client, num_tail_lines)
                )
                tasks_by_replicaset[owner_reference.name].append(pod_status_task)

    return tasks_by_replicaset
Ejemplo n.º 10
0
async def get_pod_status_tasks_by_sha_and_readiness(
    pods_task: "asyncio.Future[V1Pod]",
    backends_task: "asyncio.Future[Dict[str, Any]]",
    client: kubernetes_tools.KubeClient,
    verbose: int,
) -> DefaultDict[
    Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]]
]:
    num_tail_lines = calculate_tail_lines(verbose)
    tasks_by_sha_and_readiness: DefaultDict[
        Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]]
    ] = defaultdict(lambda: defaultdict(list))
    for pod in await pods_task:
        git_sha = pod.metadata.labels["paasta.yelp.com/git_sha"]
        config_sha = pod.metadata.labels["paasta.yelp.com/config_sha"]
        is_ready = kubernetes_tools.is_pod_ready(pod)
        pod_status_task = asyncio.create_task(
            get_pod_status(pod, backends_task, client, num_tail_lines)
        )
        tasks_by_sha_and_readiness[(git_sha, config_sha)][is_ready].append(
            pod_status_task
        )

    return tasks_by_sha_and_readiness
Ejemplo n.º 11
0
async def job_status(
    kstatus: MutableMapping[str, Any],
    client: kubernetes_tools.KubeClient,
    job_config: LongRunningServiceConfig,
    pod_list: Sequence[V1Pod],
    replicaset_list: Sequence[V1ReplicaSet],
    verbose: int,
    namespace: str,
) -> None:
    app_id = job_config.get_sanitised_deployment_name()
    kstatus["app_id"] = app_id
    kstatus["pods"] = []
    kstatus["replicasets"] = []
    if verbose > 0:
        num_tail_lines = calculate_tail_lines(verbose)

        for pod in pod_list:
            container_statuses = pod.status.container_statuses or []
            containers = [
                dict(
                    name=container.name,
                    tail_lines=await get_tail_lines_for_kubernetes_container(
                        client, pod, container, num_tail_lines,
                    ),
                )
                for container in container_statuses
            ]
            kstatus["pods"].append(
                {
                    "name": pod.metadata.name,
                    "host": kubernetes_tools.get_pod_hostname(client, pod),
                    "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(),
                    "phase": pod.status.phase,
                    "ready": kubernetes_tools.is_pod_ready(pod),
                    "containers": containers,
                    "reason": pod.status.reason,
                    "message": pod.status.message,
                }
            )
        for replicaset in replicaset_list:
            try:
                ready_replicas = replicaset.status.ready_replicas
                if ready_replicas is None:
                    ready_replicas = 0
            except AttributeError:
                ready_replicas = 0

            kstatus["replicasets"].append(
                {
                    "name": replicaset.metadata.name,
                    "replicas": replicaset.spec.replicas,
                    "ready_replicas": ready_replicas,
                    "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(),
                }
            )

    kstatus["expected_instance_count"] = job_config.get_instances()

    app = kubernetes_tools.get_kubernetes_app_by_name(
        name=app_id, kube_client=client, namespace=namespace
    )
    deploy_status = kubernetes_tools.get_kubernetes_app_deploy_status(
        app=app, desired_instances=job_config.get_instances()
    )
    kstatus["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring(
        deploy_status
    )
    kstatus["running_instance_count"] = (
        app.status.ready_replicas if app.status.ready_replicas else 0
    )
    kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp()
    kstatus["namespace"] = app.metadata.namespace