Example #1
0
def get_deployments_strings(service: str, soa_dir: str) -> List[str]:
    output = []
    try:
        deployments = get_actual_deployments(service, soa_dir)
    except NoDeploymentsAvailable:
        deployments = {}
    if deployments == {}:
        output.append(" - N/A: Not deployed to any PaaSTA Clusters")
    else:
        service_config = load_service_namespace_config(service=service,
                                                       namespace="main",
                                                       soa_dir=soa_dir)
        service_mode = service_config.get_mode()
        for cluster in deployments_to_clusters(deployments):
            if service_mode == "tcp":
                service_port = service_config.get("proxy_port")
                link = PaastaColors.cyan("%s://paasta-%s.yelp:%d/" %
                                         (service_mode, cluster, service_port))
            elif service_mode == "http" or service_mode == "https":
                link = PaastaColors.cyan(
                    f"{service_mode}://{service}.paasta-{cluster}.yelp/")
            else:
                link = "N/A"
            output.append(f" - {cluster} ({link})")
    return output
Example #2
0
def get_service_info(service):
    service_configuration = read_service_configuration(service)
    description = service_configuration.get('description', NO_DESCRIPTION_MESSAGE)
    external_link = service_configuration.get('external_link', NO_EXTERNAL_LINK_MESSAGE)
    pipeline_url = get_pipeline_url(service)
    smartstack_endpoints = get_smartstack_endpoints(service)
    git_url = get_git_url(service)

    output = []
    output.append('Service Name: %s' % service)
    output.append('Description: %s' % description)
    output.append('External Link: %s' % PaastaColors.cyan(external_link))
    output.append('Monitored By: team %s' % get_team(service=service, overrides={}))
    output.append('Runbook: %s' % PaastaColors.cyan(get_runbook(service=service, overrides={})))
    output.append('Git Repo: %s' % git_url)
    output.append('Jenkins Pipeline: %s' % pipeline_url)
    output.append('Deployed to the following clusters:')
    output.extend(get_deployments_strings(service))
    if smartstack_endpoints:
        output.append('Smartstack endpoint(s):')
        for endpoint in smartstack_endpoints:
            output.append(' - %s' % endpoint)
    output.append('Dashboard(s):')
    output.extend(get_dashboard_urls(service))

    return '\n'.join(output)
Example #3
0
def get_service_info(service, soa_dir):
    service_configuration = read_service_configuration(service, soa_dir)
    description = service_configuration.get('description',
                                            NO_DESCRIPTION_MESSAGE)
    external_link = service_configuration.get('external_link',
                                              NO_EXTERNAL_LINK_MESSAGE)
    smartstack_endpoints = get_smartstack_endpoints(service, soa_dir)
    git_url = get_git_url(service, soa_dir)

    output = []
    output.append('Service Name: %s' % service)
    output.append('Description: %s' % description)
    output.append('External Link: %s' % PaastaColors.cyan(external_link))
    output.append('Monitored By: team %s' %
                  get_team(service=service, overrides={}))
    output.append(
        'Runbook: %s' %
        PaastaColors.cyan(get_runbook(service=service, overrides={})))
    output.append('Git Repo: %s' % git_url)
    output.append('Deployed to the following clusters:')
    output.extend(get_deployments_strings(service, soa_dir))
    if smartstack_endpoints:
        output.append('Smartstack endpoint(s):')
        for endpoint in smartstack_endpoints:
            output.append(' - %s' % endpoint)
    output.append('Dashboard(s):')
    output.extend(get_dashboard_urls(service))

    return '\n'.join(output)
Example #4
0
def get_deployments_strings(service: str, soa_dir: str) -> List[str]:
    output = []
    try:
        deployments = get_actual_deployments(service, soa_dir)
    except NoDeploymentsAvailable:
        deployments = {}
    if deployments == {}:
        output.append(' - N/A: Not deployed to any PaaSTA Clusters')
    else:
        service_config = load_service_namespace_config(
            service=service,
            namespace='main',
            soa_dir=soa_dir,
        )
        service_mode = service_config.get_mode()
        for cluster in deployments_to_clusters(deployments):
            if service_mode == "tcp":
                service_port = service_config.get('proxy_port')
                link = PaastaColors.cyan('%s://paasta-%s.yelp:%d/' %
                                         (service_mode, cluster, service_port))
            elif service_mode == "http" or service_mode == "https":
                link = PaastaColors.cyan('%s://%s.paasta-%s.yelp/' %
                                         (service_mode, service, cluster))
            else:
                link = "N/A"
            output.append(' - %s (%s)' % (cluster, link))
    return output
Example #5
0
def get_service_info(service, soa_dir):
    service_configuration = read_service_configuration(service, soa_dir)
    description = service_configuration.get("description",
                                            NO_DESCRIPTION_MESSAGE)
    external_link = service_configuration.get("external_link",
                                              NO_EXTERNAL_LINK_MESSAGE)
    smartstack_endpoints = get_smartstack_endpoints(service, soa_dir)
    git_url = get_git_url(service, soa_dir)

    output = []
    output.append("Service Name: %s" % service)
    output.append("Description: %s" % description)
    output.append("External Link: %s" % PaastaColors.cyan(external_link))
    output.append("Monitored By: team %s" %
                  get_team(service=service, overrides={}, soa_dir=soa_dir))
    output.append("Runbook: %s" % PaastaColors.cyan(
        get_runbook(service=service, overrides={}, soa_dir=soa_dir)))
    output.append("Git Repo: %s" % git_url)
    output.append("Deployed to the following clusters:")
    output.extend(get_deployments_strings(service, soa_dir))
    if smartstack_endpoints:
        output.append("Smartstack endpoint(s):")
        for endpoint in smartstack_endpoints:
            output.append(" - %s" % endpoint)
    output.append("Dashboard(s):")
    output.extend(get_dashboard_urls(service))

    return "\n".join(output)
Example #6
0
class NoSuchService(Exception):
    """Exception to be raised in the event that the service
    name can not be guessed.
    """

    GUESS_ERROR_MSG = ("Could not determine service name.\n"
                       "Please run this from the root of a copy "
                       "(git clone) of your service.\n"
                       "Alternatively, supply the %s name you wish to "
                       "inspect with the %s option." %
                       (PaastaColors.cyan("SERVICE"), PaastaColors.cyan("-s")))

    CHECK_ERROR_MSG = (
        "not found.  Please provide a valid service name.\n"
        "Ensure that a directory of the same name exists in %s." %
        PaastaColors.green("/nail/etc/services"))

    def __init__(self, service):
        self.service = service

    def __str__(self):
        if self.service:
            return "SERVICE: {} {}".format(PaastaColors.cyan(self.service),
                                           self.CHECK_ERROR_MSG)
        else:
            return self.GUESS_ERROR_MSG
def pick_default_log_mode(args, log_reader, service, levels, components,
                          clusters, instances):
    if log_reader.SUPPORTS_LINE_COUNT:
        paasta_print(
            PaastaColors.cyan(
                "Fetching 100 lines and applying filters. Try -n 1000 for more lines..."
            ),
            file=sys.stderr,
        )
        log_reader.print_last_n_logs(
            service=service,
            line_count=100,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
    elif log_reader.SUPPORTS_TIME:
        start_time, end_time = generate_start_end_time()
        paasta_print(
            PaastaColors.cyan(
                "Fetching a specific time period and applying filters..."),
            file=sys.stderr,
        )
        log_reader.print_logs_by_time(
            service=service,
            start_time=start_time,
            end_time=end_time,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
    elif log_reader.SUPPORTS_TAILING:
        paasta_print(PaastaColors.cyan("Tailing logs and applying filters..."),
                     file=sys.stderr)
        log_reader.tail_logs(
            service=service,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
Example #8
0
def status_marathon_job_human(
    service,
    instance,
    deploy_status,
    app_id,
    running_instances,
    normal_instance_count,
):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if deploy_status != 'NotRunning':
        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green(
                "(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red(
                "(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow(
                "(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (
            status, instance_count, deploy_status)
    else:
        status = PaastaColors.yellow("Warning")
        return "Marathon:   %s - %s (app %s) is not configured in Marathon yet (waiting for bounce)" % (
            status,
            name,
            app_id,
        )
def status_marathon_job_human(
    service: str,
    instance: str,
    deploy_status: str,
    desired_app_id: str,
    app_count: int,
    running_instances: int,
    normal_instance_count: int,
) -> str:
    name = PaastaColors.cyan(compose_job_id(service, instance))

    if app_count >= 0:
        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green(
                "(%d/%d)" % (running_instances, normal_instance_count)
            )
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red(
                "(%d/%d)" % (running_instances, normal_instance_count)
            )
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow(
                "(%d/%d)" % (running_instances, normal_instance_count)
            )
        return "Marathon:   {} - up with {} instances. Status: {}".format(
            status, instance_count, deploy_status
        )
    else:
        status = PaastaColors.yellow("Warning")
        return "Marathon:   {} - {} (app {}) is not configured in Marathon yet (waiting for bounce)".format(
            status, name, desired_app_id
        )
Example #10
0
 def git_repo_missing(git_url):
     git_url = PaastaColors.cyan(git_url)
     return failure(
         "Could not find Git repo %s. "
         "Your service must be there.\n"
         "  More info:" % git_url,
         "http://y/yelpsoa-configs")
def start_chronos_job(service, instance, job_id, client, cluster, job_config, complete_job_config, emergency=False):
    """
    Calls the 'manual start' Chronos endpoint (https://mesos.github.io/chronos/docs/api.html#manually-starting-a-job),
    running the job now regardless of its 'schedule'. The job's "schedule" is unmodified. If a job is disabled,
    this function does not do anything.
    """
    name = PaastaColors.cyan(job_id)

    # The job should be run immediately as long as the job is not disabled via the 'disabled' key in soa-configs or has
    # been previously stopped.
    if complete_job_config['disabled']:
        print PaastaColors.red("You cannot emergency start a disabled job. Run `paasta start` first.")
    else:
        log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce"
        _log(
            service=service,
            line="%s: Starting manual run of %s in Chronos" % (log_reason, name),
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance
        )

        client.update(complete_job_config)
        client.run(job_id)
Example #12
0
def status_marathon_job(service, instance, app_id, normal_instance_count,
                        client):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if marathon_tools.is_app_id_running(app_id, client):
        app = client.get_app(app_id)
        running_instances = app.tasks_running
        if len(app.deployments) == 0:
            deploy_status = PaastaColors.bold("Running")
        else:
            deploy_status = PaastaColors.yellow("Deploying")
        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green(
                "(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red(
                "(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow(
                "(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s." % (
            status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (
            status, name, app_id, red_not)
Example #13
0
 def git_repo_missing(git_url):
     git_url = PaastaColors.cyan(git_url)
     return failure(
         "Could not find Git repo %s. "
         "Your service must be there.\n"
         "  More info:" % git_url,
         "http://y/yelpsoa-configs")
Example #14
0
def missing_deployments_message(service):
    jenkins_url = PaastaColors.cyan(
        'https://jenkins.yelpcorp.com/view/services-%s' % service)
    message = "%s No deployments in deployments.json yet.\n  " \
              "Has Jenkins run?\n  " \
              "Check: %s" % (x_mark(), jenkins_url)
    return message
Example #15
0
 def __str__(self):
     if self.service:
         return "SERVICE: {} {}".format(
             PaastaColors.cyan(self.service), self.CHECK_ERROR_MSG
         )
     else:
         return self.GUESS_ERROR_MSG
Example #16
0
def missing_deployments_message(service):
    jenkins_url = PaastaColors.cyan(
        'https://jenkins.yelpcorp.com/view/services-%s' % service)
    message = "%s No deployments in deployments.json yet.\n  " \
              "Has Jenkins run?\n  " \
              "Check: %s" % (x_mark(), jenkins_url)
    return message
Example #17
0
def start_chronos_job(service,
                      instance,
                      job_id,
                      client,
                      cluster,
                      job_config,
                      complete_job_config,
                      emergency=False):
    """
    Calls the 'manual start' Chronos endpoint (https://mesos.github.io/chronos/docs/api.html#manually-starting-a-job),
    running the job now regardless of its 'schedule'. The job's "schedule" is unmodified. If a job is disabled,
    this function does not do anything.
    """
    name = PaastaColors.cyan(job_id)

    # The job should be run immediately as long as the job is not disabled via the 'disabled' key in soa-configs or has
    # been previously stopped.
    if complete_job_config['disabled']:
        paasta_print(
            PaastaColors.red(
                "You cannot emergency start a disabled job. Run `paasta start` first."
            ))
    else:
        log_reason = PaastaColors.red(
            "EmergencyStart") if emergency else "Brutal bounce"
        _log(service=service,
             line="%s: Starting manual run of %s in Chronos" %
             (log_reason, name),
             component="deploy",
             level="event",
             cluster=cluster,
             instance=instance)

        client.update(complete_job_config)
        client.run(job_id)
Example #18
0
def pick_default_log_mode(args, log_reader, service, levels, components, clusters, instances):
    if log_reader.SUPPORTS_LINE_COUNT:
        paasta_print(
            PaastaColors.cyan(
                "No filtering specified, grabbing last 100 lines"
            ),
            file=sys.stdout,
        )
        log_reader.print_last_n_logs(
            service=service,
            line_count=100,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    elif log_reader.SUPPORTS_TIME:
        start_time, end_time = generate_start_end_time()

        paasta_print(PaastaColors.cyan("No filtering specified, grabbing last 30 minutes of logs"), file=sys.stderr)
        log_reader.print_logs_by_time(
            service=service,
            start_time=start_time,
            end_time=end_time,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    elif log_reader.SUPPORTS_TAILING:
        paasta_print(PaastaColors.cyan("No filtering specified, tailing logs"), file=sys.stderr)
        log_reader.tail_logs(
            service=service,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
Example #19
0
def get_cluster_dashboards(cluster):
    """Returns the direct dashboards for humans to use for a given cluster"""
    output = []
    output.append(
        "Warning: Dashboards in prod are not directly reachable. "
        "See http://y/paasta-troubleshooting for instructions. (search for 'prod dashboards')"
    )
    output.append("User Dashboards (Read Only):")
    output.append("  Mesos:    %s" %
                  PaastaColors.cyan("http://mesos.paasta-%s.yelp/" % cluster))
    output.append(
        "  Marathon: %s" %
        PaastaColors.cyan("http://marathon.paasta-%s.yelp/" % cluster))
    output.append(
        "  Chronos:  %s" %
        PaastaColors.cyan("http://chronos.paasta-%s.yelp/" % cluster))
    output.append("  Synapse:  %s" %
                  PaastaColors.cyan("http://paasta-%s.yelp:%s/" %
                                    (cluster, DEFAULT_SYNAPSE_PORT)))
    output.append("Admin Dashboards (Read/write, requires secrets):")
    output.append("  Mesos:    %s" %
                  PaastaColors.cyan("http://paasta-%s.yelp:5050/" % cluster))
    output.append("  Marathon: %s" %
                  PaastaColors.cyan("http://paasta-%s.yelp:5052/" % cluster))
    output.append("  Chronos:  %s" %
                  PaastaColors.cyan("http://paasta-%s.yelp:5053/" % cluster))
    return '\n'.join(output)
Example #20
0
File: logs.py Project: fnzv/paasta
def pick_default_log_mode(args, log_reader, service, levels, components, clusters):
    if log_reader.SUPPORTS_LINE_COUNT:
        sys.stderr.write(PaastaColors.cyan("No filtering specified, grabbing last 100 lines") + "\n")
        log_reader.print_last_n_logs(service, 100, levels, components, clusters, raw_mode=args.raw_mode)
        return 0

    elif log_reader.SUPPORTS_TIME:
        start_time, end_time = generate_start_end_time()

        sys.stderr.write(PaastaColors.cyan("No filtering specified, grabbing last 30 minutes of logs") + "\n")
        log_reader.print_logs_by_time(service, start_time, end_time, levels, components, clusters,
                                      raw_mode=args.raw_mode)
        return 0

    elif log_reader.SUPPORTS_TAILING:
        sys.stderr.write(PaastaColors.cyan("No filtering specified, tailing logs") + "\n")
        log_reader.tail_logs(service, levels, components, clusters, raw_mode=args.raw_mode)
        return 0
Example #21
0
def scale_marathon_job(service, instance, app_id, delta, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(service=service,
         line="EmergencyScale: Scaling %s %s by %d instances" %
         (name, 'down' if delta < 0 else 'up', abs(int(delta))),
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance)
    client.scale_app(app_id, delta=int(delta), force=True)
Example #22
0
def pick_default_log_mode(args, log_reader, service, levels, components,
                          clusters, instances):
    if log_reader.SUPPORTS_LINE_COUNT:
        sys.stderr.write(
            PaastaColors.cyan(
                "No filtering specified, grabbing last 100 lines") + "\n")
        log_reader.print_last_n_logs(service,
                                     100,
                                     levels,
                                     components,
                                     clusters,
                                     instances,
                                     raw_mode=args.raw_mode)
        return 0

    elif log_reader.SUPPORTS_TIME:
        start_time, end_time = generate_start_end_time()

        sys.stderr.write(
            PaastaColors.cyan(
                "No filtering specified, grabbing last 30 minutes of logs") +
            "\n")
        log_reader.print_logs_by_time(service,
                                      start_time,
                                      end_time,
                                      levels,
                                      components,
                                      clusters,
                                      instances,
                                      raw_mode=args.raw_mode)
        return 0

    elif log_reader.SUPPORTS_TAILING:
        sys.stderr.write(
            PaastaColors.cyan("No filtering specified, tailing logs") + "\n")
        log_reader.tail_logs(service,
                             levels,
                             components,
                             clusters,
                             instances,
                             raw_mode=args.raw_mode)
        return 0
Example #23
0
def start_marathon_job(service, instance, app_id, normal_instance_count,
                       client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(service=service,
         line="EmergencyStart: scaling %s up to %d instances" %
         (name, normal_instance_count),
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance)
    client.scale_app(app_id, instances=normal_instance_count, force=True)
Example #24
0
def scale_marathon_job(service, instance, app_id, delta, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyScale: Scaling %s %s by %d instances" % (name, 'down' if delta < 0 else 'up', abs(int(delta))),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, delta=int(delta), force=True)
Example #25
0
def start_marathon_job(service, instance, app_id, normal_instance_count, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyStart: scaling %s up to %d instances" % (name, normal_instance_count),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=normal_instance_count, force=True)
Example #26
0
def stop_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(service=service,
         line="EmergencyStop: Scaling %s down to 0 instances" % (name),
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance)
    client.scale_app(
        app_id, instances=0, force=True
    )  # TODO do we want to capture the return val of any client calls?
Example #27
0
def stop_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyStop: Scaling %s down to 0 instances" % (name),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=0, force=True)  # TODO do we want to capture the return val of any client calls?
Example #28
0
def restart_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyRestart: Scaling %s down to 0 instances, then letting them scale back up" % (name),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=0, force=True)
Example #29
0
def restart_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyRestart: Scaling %s down to 0 instances, then letting them scale back up" % (name),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=0, force=True)
Example #30
0
File: logs.py Project: somic/paasta
def pick_default_log_mode(args, log_reader, service, levels, components, clusters, instances):
    if log_reader.SUPPORTS_LINE_COUNT:
        paasta_print(PaastaColors.cyan("No filtering specified, grabbing last 100 lines"), file=sys.stdout)
        log_reader.print_last_n_logs(
            service=service,
            line_count=100,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    elif log_reader.SUPPORTS_TIME:
        start_time, end_time = generate_start_end_time()

        paasta_print(PaastaColors.cyan("No filtering specified, grabbing last 30 minutes of logs"), file=sys.stderr)
        log_reader.print_logs_by_time(
            service=service,
            start_time=start_time,
            end_time=end_time,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    elif log_reader.SUPPORTS_TAILING:
        paasta_print(PaastaColors.cyan("No filtering specified, tailing logs"), file=sys.stderr)
        log_reader.tail_logs(
            service=service,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
Example #31
0
def paasta_fsm(args):
    validate_args(args)
    (srvname, service_stanza, smartstack_stanza, monitoring_stanza,
     deploy_stanza, marathon_stanza, cluster_stanza, team) = (
        get_paasta_config(
            args.yelpsoa_config_root,
            args.srvname,
            args.auto,
            args.port,
            args.team,
            args.description,
            args.external_link,
        )
    )
    srv = Service(srvname, args.yelpsoa_config_root)
    write_paasta_config(
        srv,
        service_stanza,
        smartstack_stanza,
        monitoring_stanza,
        deploy_stanza,
        marathon_stanza,
        cluster_stanza,
    )
    print PaastaColors.yellow("               _  _(o)_(o)_  _")
    print PaastaColors.red("             ._\`:_ F S M _:' \_,")
    print PaastaColors.green("                 / (`---'\ `-.")
    print PaastaColors.cyan("              ,-`  _)    (_,")
    print "With My Noodly Appendage I Have Written Configs For"
    print
    print PaastaColors.bold("    %s" % srvname)
    print
    print "Customize Them If It Makes You Happy -- http://y/paasta For Details"
    print "Remember To Add, Commit, And Push When You're Done:"
    print
    print "cd %s" % join(args.yelpsoa_config_root, srvname)
    print "# Review And/Or Customize Files"
    print "git add ."
    print "git commit -m'Initial Commit For %s'" % srvname
    print "git push origin HEAD  # Pushmaster Or Ops Deputy Privs Required"
    print
Example #32
0
def paasta_fsm(args):
    validate_args(args)
    (srvname, service_stanza, smartstack_stanza, monitoring_stanza,
     deploy_stanza, marathon_stanza, cluster_stanza, team) = (
        get_paasta_config(
            args.yelpsoa_config_root,
            args.srvname,
            args.auto,
            args.port,
            args.team,
            args.description,
            args.external_link,
        )
    )
    srv = Service(srvname, args.yelpsoa_config_root)
    write_paasta_config(
        srv,
        service_stanza,
        smartstack_stanza,
        monitoring_stanza,
        deploy_stanza,
        marathon_stanza,
        cluster_stanza,
    )
    print PaastaColors.yellow("               _  _(o)_(o)_  _")
    print PaastaColors.red("             ._\`:_ F S M _:' \_,")
    print PaastaColors.green("                 / (`---'\ `-.")
    print PaastaColors.cyan("              ,-`  _)    (_,")
    print "With My Noodly Appendage I Have Written Configs For"
    print
    print PaastaColors.bold("    %s" % srvname)
    print
    print "Customize Them If It Makes You Happy -- http://y/paasta For Details"
    print "Remember To Add, Commit, And Push When You're Done:"
    print
    print "cd %s" % join(args.yelpsoa_config_root, srvname)
    print "# Review And/Or Customize Files"
    print "git add ."
    print "git commit -m'Initial Commit For %s'" % srvname
    print "git push origin HEAD  # Pushmaster Or Ops Deputy Privs Required"
    print
Example #33
0
def get_deployments_strings(service, soa_dir):
    output = []
    try:
        deployments = get_actual_deployments(service, soa_dir)
    except NoDeploymentsAvailable:
        deployments = {}
    if deployments == {}:
        output.append(' - N/A: Not deployed to any PaaSTA Clusters')
    else:
        service_config = load_service_namespace_config(service, 'main', soa_dir)
        service_mode = service_config.get_mode()
        for cluster in deployments_to_clusters(deployments):
            if service_mode == "tcp":
                service_port = service_config.get('proxy_port')
                link = PaastaColors.cyan('%s://paasta-%s.yelp:%d/' % (service_mode, cluster, service_port))
            elif service_mode == "http":
                link = PaastaColors.cyan('%s://%s.paasta-%s.yelp/' % (service_mode, service, cluster))
            else:
                link = "N/A"
            output.append(' - %s (%s)' % (cluster, link))
    return output
Example #34
0
def get_cluster_dashboards(cluster):
    """Returns the direct dashboards for humans to use for a given cluster"""
    SPACER = ' '
    try:
        dashboards = load_system_paasta_config().get_dashboard_links()[cluster]
    except KeyError:
        output = [PaastaColors.red('No dashboards configured for %s!' % cluster)]
    else:
        output = ['Dashboards:']
        spacing = max((len(label) for label in dashboards.keys())) + 1
        for label, url in dashboards.items():
            output.append('  %s:%s%s' % (label, SPACER * (spacing - len(label)), PaastaColors.cyan(url)))
    return '\n'.join(output)
Example #35
0
def paasta_fsm(args):
    variables = get_paasta_config(yelpsoa_config_root=args.yelpsoa_config_root)
    destination = args.yelpsoa_config_root

    paasta_config = load_system_paasta_config()
    template = paasta_config.get_fsm_template()

    write_paasta_config(
        variables=variables,
        template=template,
        destination=destination,
    )

    print PaastaColors.yellow("               _  _(o)_(o)_  _")
    print PaastaColors.red("             ._\`:_ F S M _:' \_,")
    print PaastaColors.green("                 / (`---'\ `-.")
    print PaastaColors.cyan("              ,-`  _)    (_,")
    print "With My Noodly Appendage I Have Written Configs!"
    print
    print "Customize Them If It Makes You Happy -- http://y/paasta For Details"
    print "Remember To Add, Commit, And Push When You're Done:"
    print
Example #36
0
def paasta_fsm(args):
    variables = get_paasta_config(yelpsoa_config_root=args.yelpsoa_config_root)
    destination = args.yelpsoa_config_root

    paasta_config = load_system_paasta_config()
    template = paasta_config.get_fsm_template()

    write_paasta_config(
        variables=variables,
        template=template,
        destination=destination,
    )

    print PaastaColors.yellow("               _  _(o)_(o)_  _")
    print PaastaColors.red("             ._\`:_ F S M _:' \_,")
    print PaastaColors.green("                 / (`---'\ `-.")
    print PaastaColors.cyan("              ,-`  _)    (_,")
    print "With My Noodly Appendage I Have Written Configs!"
    print
    print "Customize Them If It Makes You Happy -- http://y/paasta For Details"
    print "Remember To Add, Commit, And Push When You're Done:"
    print
Example #37
0
def stop_chronos_job(service, instance, client, cluster, existing_jobs, emergency=False):
    log_reason = PaastaColors.red("EmergencyStop") if emergency else "Brutal bounce"
    for job in existing_jobs:
        name = PaastaColors.cyan(job["name"])
        _log(
            service=service,
            line="%s: Killing all tasks for job %s" % (log_reason, name),
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance
        )
        job["disabled"] = True
        client.update(job)
        client.delete_tasks(job["name"])
def stop_chronos_job(service, instance, client, cluster, existing_jobs, emergency=False):
    log_reason = PaastaColors.red("EmergencyStop") if emergency else "Brutal bounce"
    for job in existing_jobs:
        name = PaastaColors.cyan(job["name"])
        _log(
            service=service,
            line="%s: Killing all tasks for job %s" % (log_reason, name),
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance
        )
        job["disabled"] = True
        client.update(job)
        client.delete_tasks(job["name"])
Example #39
0
def start_chronos_job(service, instance, job_id, client, cluster, job_config, emergency=False):
    name = PaastaColors.cyan(job_id)
    log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce"
    log_immediate_run = " and running it immediately" if not job_config["disabled"] else ""
    _log(
        service=service,
        line="%s: Sending job %s to Chronos%s" % (log_reason, name, log_immediate_run),
        component="deploy",
        level="event",
        cluster=cluster,
        instance=instance
    )
    client.update(job_config)
    # TODO fail or give some output/feedback to user that the job won't run immediately if disabled (PAASTA-1244)
    if not job_config["disabled"]:
        client.run(job_id)
Example #40
0
def get_cluster_dashboards(cluster):
    """Returns the direct dashboards for humans to use for a given cluster"""
    SPACER = ' '
    try:
        dashboards = load_system_paasta_config().get_dashboard_links()[cluster]
    except KeyError as e:
        if e.args[0] == cluster:
            output = [PaastaColors.red('No dashboards configured for %s!' % cluster)]
        else:
            output = [PaastaColors.red('No dashboards configured!')]
    else:
        output = ['Dashboards:']
        spacing = max((len(label) for label in dashboards.keys())) + 1
        for label, url in dashboards.items():
            output.append('  %s:%s%s' % (label, SPACER * (spacing - len(label)), PaastaColors.cyan(url)))
    return '\n'.join(output)
def start_chronos_job(service, instance, job_id, client, cluster, job_config, emergency=False):
    name = PaastaColors.cyan(job_id)
    log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce"
    log_immediate_run = " and running it immediately" if not job_config["disabled"] else ""
    _log(
        service=service,
        line="%s: Sending job %s to Chronos%s" % (log_reason, name, log_immediate_run),
        component="deploy",
        level="event",
        cluster=cluster,
        instance=instance
    )
    client.update(job_config)
    # TODO fail or give some output/feedback to user that the job won't run immediately if disabled (PAASTA-1244)
    if not job_config["disabled"]:
        client.run(job_id)
Example #42
0
def get_cluster_dashboards(cluster):
    """Returns the direct dashboards for humans to use for a given cluster"""
    output = []
    output.append("Warning: Dashboards in prod are not directly reachable. "
                  "See http://y/paasta-troubleshooting for instructions. (search for 'prod dashboards')")
    output.append("User Dashboards (Read Only):")
    output.append("  Mesos:    %s" % PaastaColors.cyan("http://mesos.paasta-%s.yelp/" % cluster))
    output.append("  Marathon: %s" % PaastaColors.cyan("http://marathon.paasta-%s.yelp/" % cluster))
    output.append("  Chronos:  %s" % PaastaColors.cyan("http://chronos.paasta-%s.yelp/" % cluster))
    output.append("  Synapse:  %s" % PaastaColors.cyan("http://paasta-%s.yelp:%s/" % (cluster, DEFAULT_SYNAPSE_PORT)))
    output.append("Admin Dashboards (Read/write, requires secrets):")
    output.append("  Mesos:    %s" % PaastaColors.cyan("http://paasta-%s.yelp:5050/" % cluster))
    output.append("  Marathon: %s" % PaastaColors.cyan("http://paasta-%s.yelp:5052/" % cluster))
    output.append("  Chronos:  %s" % PaastaColors.cyan("http://paasta-%s.yelp:5053/" % cluster))
    return '\n'.join(output)
Example #43
0
def status_marathon_job_human(service, instance, deploy_status, app_id,
                              running_instances, normal_instance_count):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if deploy_status != 'NotRunning':
        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
Example #44
0
def status_marathon_job_human(
    service,
    instance,
    deploy_status,
    app_id,
    running_instances,
    normal_instance_count,
    unused_offers_summary=None,
):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if unused_offers_summary is not None and len(unused_offers_summary) > 0:
        stalled_str = "\n    ".join([
            "%s: %s times" % (k, n) for k, n in unused_offers_summary.items()
        ])
        stall_reason = "\n  Possibly stalled for:\n    %s" % stalled_str
    else:
        stall_reason = ""
    if deploy_status != 'NotRunning':
        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green(
                "(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red(
                "(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow(
                "(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s%s" % (
            status,
            instance_count,
            deploy_status,
            stall_reason,
        )
    else:
        status = PaastaColors.yellow("Warning")
        return "Marathon:   %s - %s (app %s) is not configured in Marathon yet (waiting for bounce)%s" % (
            status,
            name,
            app_id,
            stall_reason,
        )
Example #45
0
def status_marathon_job(service, instance, app_id, normal_instance_count,
                        client):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if marathon_tools.is_app_id_running(app_id, client):
        app = client.get_app(app_id)
        running_instances = app.tasks_running

        if len(app.deployments) == 0:
            deploy_status = PaastaColors.bold("Running")
        elif app.instances == 0 and app.tasks_running == 0:
            deploy_status = PaastaColors.grey("Stopped")
        else:
            # App is currently deploying so we should check the launch queue for more info
            is_overdue, backoff_seconds = marathon_tools.get_app_queue_status(
                client, app_id)

            if is_overdue:
                deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red(
                    "Waiting")
            elif backoff_seconds:
                deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % (
                    PaastaColors.red("Delayed"), backoff_seconds)
            else:
                deploy_status = PaastaColors.yellow("Deploying")

        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green(
                "(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red(
                "(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow(
                "(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (
            status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (
            status, name, app_id, red_not)
Example #46
0
def status_marathon_job(service, instance, app_id, normal_instance_count, client):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if marathon_tools.is_app_id_running(app_id, client):
        app = client.get_app(app_id)
        running_instances = app.tasks_running
        deploy_status = marathon_tools.get_marathon_app_deploy_status_human(app, app_id, client)

        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
Example #47
0
def get_cluster_dashboards(cluster: str, ) -> str:
    """Returns the direct dashboards for humans to use for a given cluster"""
    SPACER = " "
    try:
        dashboards = load_system_paasta_config().get_dashboard_links()[cluster]
    except KeyError as e:
        if e.args[0] == cluster:
            output = [
                PaastaColors.red("No dashboards configured for %s!" % cluster)
            ]
        else:
            output = [PaastaColors.red("No dashboards configured!")]
    else:
        output = ["Dashboards:"]
        spacing = max((len(label) for label in dashboards.keys())) + 1
        for label, urls in dashboards.items():
            if isinstance(urls, list):
                urls = "\n    %s" % "\n    ".join(urls)
            output.append("  {}:{}{}".format(label,
                                             SPACER * (spacing - len(label)),
                                             PaastaColors.cyan(urls)))
    return "\n".join(output)
Example #48
0
def status_marathon_job(service, instance, app_id, normal_instance_count, client):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if marathon_tools.is_app_id_running(app_id, client):
        app = client.get_app(app_id)
        running_instances = app.tasks_running

        if len(app.deployments) == 0:
            deploy_status = PaastaColors.bold("Running")
        elif app.instances == 0 and app.tasks_running == 0:
            deploy_status = PaastaColors.grey("Stopped")
        else:
            # App is currently deploying so we should check the launch queue for more info
            is_overdue, backoff_seconds = marathon_tools.get_app_queue_status(client, app_id)

            if is_overdue:
                deploy_status = "%s (new tasks are not launching due to lack of capacity)" % PaastaColors.red("Waiting")
            elif backoff_seconds:
                deploy_status = "%s (next task won't launch for %s seconds due to previous failures)" % (
                                PaastaColors.red("Delayed"), backoff_seconds)
            else:
                deploy_status = PaastaColors.yellow("Deploying")

        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
Example #49
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: healthcheck_passed: boolean
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures()

        paasta_print('\nStarting health check via %s (waiting %s seconds before '
                     'considering failures due to grace period):' % (healthcheck_link, grace_period))

        # silenty start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        after_grace_period_attempts = 0
        while True:
            # First inspect the container for early exits
            container_state = docker_client.inspect_container(container_id)
            if not container_state['State']['Running']:
                paasta_print(
                    PaastaColors.red('Container exited with code {}'.format(
                        container_state['State']['ExitCode'],
                    ))
                )
                healthcheck_passed = False
                break

            healthcheck_passed, healthcheck_output = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout,
            )

            # Yay, we passed the healthcheck
            if healthcheck_passed:
                paasta_print("{}'{}' (via {})".format(
                    PaastaColors.green("Healthcheck succeeded!: "),
                    healthcheck_output,
                    healthcheck_link,
                ))
                break

            # Otherwise, print why we failed
            if time.time() < graceperiod_end_time:
                color = PaastaColors.grey
                msg = '(disregarded due to grace period)'
                extra_msg = ' (via: {}. Output: {})'.format(healthcheck_link, healthcheck_output)
            else:
                # If we've exceeded the grace period, we start incrementing attempts
                after_grace_period_attempts += 1
                color = PaastaColors.red
                msg = '(Attempt {} of {})'.format(
                    after_grace_period_attempts, max_failures,
                )
                extra_msg = ' (via: {}. Output: {})'.format(healthcheck_link, healthcheck_output)

            paasta_print('{}{}'.format(
                color('Healthcheck failed! {}'.format(msg)),
                extra_msg,
            ))

            if after_grace_period_attempts == max_failures:
                break

            time.sleep(interval)
    else:
        paasta_print('\nPaaSTA would have healthchecked your service via\n%s' % healthcheck_link)
        healthcheck_passed = True
    return healthcheck_passed
Example #50
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: a 2-tuple of (healthcheck_passed_bool, healthcheck_output_string)
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures()

        sys.stdout.write('\nStarting health check via %s (waiting %s seconds before '
                         'considering failures due to grace period):\n' % (healthcheck_link, grace_period))

        # silenty start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        after_grace_period_attempts = 0
        while True:
            # First inspect the container for early exits
            container_state = docker_client.inspect_container(container_id)
            if not container_state['State']['Running']:
                sys.stdout.write(
                    PaastaColors.red('Container exited with code {}'.format(
                        container_state['State']['ExitCode'],
                    )) + '\n'
                )
                healthcheck_result = (False, "Aborted by the user")
                break

            healthcheck_result = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout,
            )

            # Yay, we passed the healthcheck
            if healthcheck_result[0]:
                sys.stdout.write("{}'{}' (via {})\n".format(
                    PaastaColors.green("Healthcheck succeeded!: "),
                    healthcheck_result[1],
                    healthcheck_link,
                ))
                break

            # Otherwise, print why we failed
            if time.time() < graceperiod_end_time:
                color = PaastaColors.grey
                msg = '(disregarded due to grace period)'
                extra_msg = ''
            else:
                # If we've exceeded the grace period, we start incrementing attempts
                after_grace_period_attempts += 1
                color = PaastaColors.red
                msg = '(Attempt {} of {})'.format(
                    after_grace_period_attempts, max_failures,
                )
                extra_msg = ' (via: {})'.format(healthcheck_link)

            sys.stdout.write('{}{}\n'.format(
                color('Healthcheck failed! {}'.format(msg)),
                extra_msg,
            ))

            if after_grace_period_attempts == max_failures:
                break

            time.sleep(interval)
    else:
        sys.stdout.write('\nMesos would have healthchecked your service via\n%s\n' % healthcheck_link)
        healthcheck_result = (True, "No healthcheck enabled")
    return healthcheck_result
Example #51
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled,
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: healthcheck_passed: boolean
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures()

        paasta_print(
            "\nStarting health check via %s (waiting %s seconds before "
            "considering failures due to grace period):"
            % (healthcheck_link, grace_period)
        )

        # silently start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        after_grace_period_attempts = 0
        healthchecking = True

        def _stream_docker_logs(container_id, generator):
            while healthchecking:
                try:
                    # the generator will block until another log line is available
                    log_line = next(generator).decode("utf-8").rstrip("\n")
                    if healthchecking:
                        paasta_print(f"container [{container_id[:12]}]: {log_line}")
                    else:
                        # stop streaming at first opportunity, since generator.close()
                        # cant be used until the container is dead
                        break
                except StopIteration:  # natural end of logs
                    break

        docker_logs_generator = docker_client.logs(
            container_id, stderr=True, stream=True
        )
        threading.Thread(
            target=_stream_docker_logs,
            daemon=True,
            args=(container_id, docker_logs_generator),
        ).start()

        while True:
            # First inspect the container for early exits
            container_state = docker_client.inspect_container(container_id)
            if not container_state["State"]["Running"]:
                paasta_print(
                    PaastaColors.red(
                        "Container exited with code {}".format(
                            container_state["State"]["ExitCode"]
                        )
                    )
                )
                healthcheck_passed = False
                break

            healthcheck_passed, healthcheck_output = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout
            )

            # Yay, we passed the healthcheck
            if healthcheck_passed:
                paasta_print(
                    "{}'{}' (via {})".format(
                        PaastaColors.green("Healthcheck succeeded!: "),
                        healthcheck_output,
                        healthcheck_link,
                    )
                )
                break

            # Otherwise, print why we failed
            if time.time() < graceperiod_end_time:
                color = PaastaColors.grey
                msg = "(disregarded due to grace period)"
                extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})"
            else:
                # If we've exceeded the grace period, we start incrementing attempts
                after_grace_period_attempts += 1
                color = PaastaColors.red
                msg = "(Attempt {} of {})".format(
                    after_grace_period_attempts, max_failures
                )
                extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})"

            paasta_print("{}{}".format(color(f"Healthcheck failed! {msg}"), extra_msg))

            if after_grace_period_attempts == max_failures:
                break

            time.sleep(interval)
        healthchecking = False  # end docker logs stream
    else:
        paasta_print(
            "\nPaaSTA would have healthchecked your service via\n%s" % healthcheck_link
        )
        healthcheck_passed = True
    return healthcheck_passed
Example #52
0
File: logs.py Project: somic/paasta
def paasta_logs(args):
    """Print the logs for as Paasta service.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    soa_dir = args.soa_dir
    service = figure_out_service_name(args, soa_dir)

    if args.clusters is None:
        clusters = list_clusters(service, soa_dir=soa_dir)
    else:
        clusters = args.clusters.split(",")

    if args.instances is None:
        instances = None
    else:
        instances = args.instances.split(",")

    if args.components is not None:
        components = args.components.split(",")
    else:
        components = DEFAULT_COMPONENTS
    components = set(components)

    if "app_output" in components:
        components.remove("app_output")
        components.add("stdout")
        components.add("stderr")

    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.WARNING)

    levels = [DEFAULT_LOGLEVEL, "debug"]

    log.info("Going to get logs for %s on clusters %s" % (service, clusters))

    log_reader = get_log_reader()

    if not validate_filtering_args(args, log_reader):
        return 1

    # They haven't specified what kind of filtering they want, decide for them
    if args.line_count is None and args.time_from is None and not args.tail:
        return pick_default_log_mode(args, log_reader, service, levels, components, clusters, instances)

    if args.tail:
        paasta_print(PaastaColors.cyan("Tailing logs"), file=sys.stderr)
        log_reader.tail_logs(
            service=service,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    # If the logger doesn't support offsetting the number of lines by a particular line number
    # there is no point in distinguishing between a positive/negative number of lines since it
    # can only get the last N lines
    if not log_reader.SUPPORTS_LINE_OFFSET and args.line_count is not None:
        args.line_count = abs(args.line_count)

    # Handle line based filtering
    if args.line_count is not None and args.line_offset is None:
        log_reader.print_last_n_logs(
            service=service,
            line_count=args.line_count,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
    elif args.line_count is not None and args.line_offset is not None:
        log_reader.print_logs_by_offset(
            service=service,
            line_count=args.line_count,
            line_offset=args.line_offset,
            levels=levels,
            components=components,
            cluters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    # Handle time based filtering
    try:
        start_time, end_time = generate_start_end_time(args.time_from, args.time_to)
    except ValueError as e:
        paasta_print(PaastaColors.red(e.message), file=sys.stderr)
        return 1

    log_reader.print_logs_by_time(
        service=service,
        start_time=start_time,
        end_time=end_time,
        levels=levels,
        components=components,
        clusters=clusters,
        instances=instances,
        raw_mode=args.raw_mode,
    )
Example #53
0
 def service_dir_found(service, soa_dir):
     message = "yelpsoa-config directory for %s found in %s" \
               % (PaastaColors.cyan(service), soa_dir)
     return success(message)
Example #54
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: if healthcheck_enabled is true, then returns output of healthcheck, otherwise simply returns true
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures()

        sys.stdout.write('\nStarting health check via %s (waiting %s seconds before '
                         'considering failures due to grace period):\n' % (healthcheck_link, grace_period))

        # silenty start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        while True:
            healthcheck_succeeded = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout)
            if healthcheck_succeeded or time.time() > graceperiod_end_time:
                break
            else:
                sys.stdout.write("%s\n" % PaastaColors.grey("Healthcheck failed (disregarded due to grace period)"))
            time.sleep(interval)

        failure = False
        for attempt in range(1, max_failures + 1):
            healthcheck_succeeded = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout)
            if healthcheck_succeeded:
                sys.stdout.write("%s (via: %s)\n" %
                                 (PaastaColors.green("Healthcheck succeeded!"), healthcheck_link))
                failure = False
                break
            else:
                sys.stdout.write("%s (via: %s)\n" %
                                 (PaastaColors.red("Healthcheck failed! (Attempt %d of %d)" % (attempt, max_failures)),
                                  healthcheck_link))
                failure = True
            time.sleep(interval)

        if failure:
            healthcheck_status = False
        else:
            healthcheck_status = True
    else:
        sys.stdout.write('\nMesos would have healthchecked your service via\n%s\n' % healthcheck_link)
        healthcheck_status = True
    return healthcheck_status
Example #55
0
def get_dashboard_urls(service):
    output = [' - %s (Sensu Alerts)' % (PaastaColors.cyan('https://uchiwa.yelpcorp.com/#/events?q=%s' % service))]
    return output
Example #56
0
def get_pipeline_url(service):
    return PaastaColors.cyan(
        'https://jenkins.yelpcorp.com/view/services-%s' % service)
Example #57
0
 def __str__(self):
     if self.service:
         return "SERVICE: %s %s" \
                % (PaastaColors.cyan(self.service), self.CHECK_ERROR_MSG)
     else:
         return self.GUESS_ERROR_MSG
Example #58
0
 def service_dir_found(service):
     message = "yelpsoa-config directory for %s found in /nail/etc/services" \
               % PaastaColors.cyan(service)
     return success(message)