def send_event(service, namespace, cluster, soa_dir, status, output):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param service: The service name the event is about
    :param namespace: The namespace of the service the event is about
    :param soa_dir: The service directory to read monitoring information from
    :param status: The status to emit for this event
    :param output: The output to emit for this event"""
    # This function assumes the input is a string like "mumble.main"
    monitoring_overrides = marathon_tools.load_marathon_service_config(
        service=service,
        instance=namespace,
        cluster=cluster,
        soa_dir=soa_dir,
        load_deployments=False,
    ).get_monitoring()
    if 'alert_after' not in monitoring_overrides:
        monitoring_overrides['alert_after'] = '2m'
    monitoring_overrides['check_every'] = '1m'
    monitoring_overrides['runbook'] = monitoring_tools.get_runbook(monitoring_overrides, service, soa_dir=soa_dir)

    check_name = 'check_marathon_services_replication.%s' % compose_job_id(service, namespace)
    monitoring_tools.send_event(service, check_name, monitoring_overrides, status, output, soa_dir)
    _log(
        service=service,
        line='Replication: %s' % output,
        component='monitoring',
        level='debug',
        cluster=cluster,
        instance=namespace,
    )
예제 #2
0
def send_event(service, namespace, cluster, soa_dir, status, output):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param service: The service name the event is about
    :param namespace: The namespace of the service the event is about
    :param soa_dir: The service directory to read monitoring information from
    :param status: The status to emit for this event
    :param output: The output to emit for this event"""
    # This function assumes the input is a string like "mumble.main"
    monitoring_overrides = marathon_tools.load_marathon_service_config(
        service, namespace, cluster).get_monitoring()
    if 'alert_after' not in monitoring_overrides:
        monitoring_overrides['alert_after'] = '2m'
    monitoring_overrides['check_every'] = '1m'
    monitoring_overrides['runbook'] = monitoring_tools.get_runbook(monitoring_overrides, service, soa_dir=soa_dir)

    check_name = 'check_marathon_services_replication.%s' % compose_job_id(service, namespace)
    monitoring_tools.send_event(service, check_name, monitoring_overrides, status, output, soa_dir)
    _log(
        service=service,
        line='Replication: %s' % output,
        component='monitoring',
        level='debug',
        cluster=cluster,
        instance=namespace,
    )
예제 #3
0
def get_desired_marathon_configs(soa_dir):
    cluster = load_system_paasta_config().get_cluster()
    instances = get_services_for_cluster(
        instance_type='marathon',
        cluster=cluster,
        soa_dir=soa_dir,
    )
    marathon_configs = dict()

    for service, instance in instances:
        try:
            marathon_config = load_marathon_service_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            ).format_marathon_app_dict()
            marathon_configs[marathon_config['id'].lstrip(
                '/')] = marathon_config
        except NoSlavesAvailableError as errormsg:
            _log(
                service=service,
                line=errormsg,
                component='deploy',
                level='event',
                cluster=cluster,
                instance=instance,
            )
        except (NoDeploymentsAvailable, NoDockerImageError):
            pass
    return marathon_configs
예제 #4
0
def delete_app(app_id, client):
    """Deletes a marathon app safely and logs to notify the user that it
    happened"""
    log.warn("%s appears to be old; attempting to delete" % app_id)
    service, instance, _, __ = marathon_tools.deformat_job_id(app_id)
    try:
        with bounce_lib.bounce_lock_zookeeper(
                marathon_tools.compose_job_id(service, instance)):
            bounce_lib.delete_marathon_app(app_id, client)
            log_line = "Deleted stale marathon job that looks lost: %s" % app_id
            _log(service=service,
                 component='deploy',
                 level='event',
                 cluster=load_system_paasta_config().get_cluster(),
                 instance=instance,
                 line=log_line)
    except IOError:
        log.debug("%s is being bounced, skipping" % app_id)
    except Exception:
        loglines = ['Exception raised during cleanup of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            _log(service=service,
                 component='deploy',
                 level='debug',
                 cluster=load_system_paasta_config().get_cluster(),
                 instance=instance,
                 line=logline)
        raise
예제 #5
0
def alert_state_change(application: Application, soa_dir: str) -> Generator:
    service = application.kube_deployment.service
    instance = application.kube_deployment.instance
    cluster = load_system_paasta_config().get_cluster()
    try:
        yield
        log_line = ("Deleted stale Kubernetes apps that looks lost: %s" %
                    application.item.metadata.name)
        _log(
            service=service,
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance,
            line=log_line,
        )

    except Exception:
        loglines = [
            "Exception raised during cleanup of service %s:" % application
        ]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            _log(
                service=service,
                component="deploy",
                level="debug",
                cluster=cluster,
                instance=instance,
                line=logline,
            )
        raise
예제 #6
0
def bounce_chronos_job(service, instance, cluster, jobs_to_disable,
                       jobs_to_delete, job_to_create, client):
    if any([jobs_to_disable, jobs_to_delete, job_to_create]):
        log_line = "Chronos bouncing. Jobs to disable: %s, jobs to delete: %s, job_to_create: %s" % (
            jobs_to_disable, jobs_to_delete, job_to_create)
        _log(service=service,
             instance=instance,
             component='deploy',
             cluster=cluster,
             level='debug',
             line=log_line)
    else:
        log.debug("Not doing any chronos bounce action for %s" %
                  chronos_tools.compose_job_id(service, instance))
    for job in jobs_to_disable:
        chronos_tools.disable_job(client=client, job=job)
    for job in jobs_to_delete:
        chronos_tools.delete_job(client=client, job=job)
    if job_to_create:
        chronos_tools.create_job(client=client, job=job_to_create)
        log_line = 'Created new Chronos job: %s' % job_to_create['name']
        _log(service=service,
             instance=instance,
             component='deploy',
             cluster=cluster,
             level='event',
             line=log_line)
    return (0, "All chronos bouncing tasks finished.")
예제 #7
0
파일: itest.py 프로젝트: ashwinaj/paasta
def paasta_itest(args):
    """Build and test a docker image"""
    service = args.service
    if service and service.startswith("services-"):
        service = service.split("services-", 1)[1]
    validate_service_name(service)

    tag = build_docker_tag(service, args.commit)
    run_env = os.environ.copy()
    run_env["DOCKER_TAG"] = tag
    cmd = "make itest"
    loglines = []

    _log(service=service, line="starting itest for %s." % args.commit, component="build", level="event")
    returncode, output = _run(
        cmd, env=run_env, timeout=3600, log=True, component="build", service=service, loglevel="debug"
    )
    if returncode != 0:
        loglines.append("ERROR: itest failed for %s." % args.commit)
        output = get_jenkins_build_output_url()
        if output:
            loglines.append("See output: %s" % output)
    else:
        loglines.append("itest passed for %s." % args.commit)
        if not check_docker_image(service, args.commit):
            loglines.append("ERROR: itest has not created %s" % tag)
            returncode = 1
    for logline in loglines:
        _log(service=service, line=logline, component="build", level="event")
    sys.exit(returncode)
def send_event(service, namespace, cluster, soa_dir, status, output):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param service: The service name the event is about
    :param namespace: The namespace of the service the event is about
    :param soa_dir: The service directory to read monitoring information from
    :param status: The status to emit for this event
    :param output: The output to emit for this event"""
    # This function assumes the input is a string like "mumble.main"
    monitoring_overrides = marathon_tools.load_marathon_service_config(service, namespace, cluster).get_monitoring()
    if "alert_after" not in monitoring_overrides:
        monitoring_overrides["alert_after"] = "2m"
    monitoring_overrides["check_every"] = "1m"
    monitoring_overrides["runbook"] = monitoring_tools.get_runbook(monitoring_overrides, service, soa_dir=soa_dir)

    check_name = "check_marathon_services_replication.%s" % compose_job_id(service, namespace)
    monitoring_tools.send_event(service, check_name, monitoring_overrides, status, output, soa_dir)
    _log(
        service=service,
        line="Replication: %s" % output,
        component="monitoring",
        level="debug",
        cluster=cluster,
        instance=namespace,
    )
예제 #9
0
def mark_for_deployment(git_url, cluster, instance, service, commit):
    """Mark a docker image for deployment"""
    cmd = build_command(git_url, commit, cluster=cluster, instance=instance)
    # Clusterinstance should be in cluster.instance format
    returncode, output = _run(
        cmd,
        timeout=30,
    )
    loglines = get_loglines(
        returncode=returncode,
        cmd=cmd,
        output=output,
        commit=commit,
        cluster=cluster,
        instance=instance
    )
    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
        )
    return returncode
예제 #10
0
def bounce_chronos_job(
    service,
    instance,
    cluster,
    jobs_to_disable,
    jobs_to_delete,
    job_to_create,
    client
):
    if any([jobs_to_disable, jobs_to_delete, job_to_create]):
        log_line = "Chronos bouncing. Jobs to disable: %s, jobs to delete: %s, job_to_create: %s" % (
            jobs_to_disable, jobs_to_delete, job_to_create)
        _log(service=service, instance=instance, component='deploy',
             cluster=cluster, level='debug', line=log_line)
    else:
        log.debug("Not doing any chronos bounce action for %s" % chronos_tools.compose_job_id(
            service, instance))
    for job in jobs_to_disable:
        chronos_tools.disable_job(client=client, job=job)
    for job in jobs_to_delete:
        chronos_tools.delete_job(client=client, job=job)
    if job_to_create:
        chronos_tools.create_job(client=client, job=job_to_create)
        log_line = 'Created new Chronos job: %s' % job_to_create['name']
        _log(service=service, instance=instance, component='deploy',
             cluster=cluster, level='event', line=log_line)
    return (0, "All chronos bouncing tasks finished.")
예제 #11
0
def delete_app(app_id, client):
    """Deletes a marathon app safely and logs to notify the user that it
    happened"""
    log.warn("%s appears to be old; attempting to delete" % app_id)
    service, instance, _, __ = marathon_tools.deformat_job_id(app_id)
    try:
        with bounce_lib.bounce_lock_zookeeper(marathon_tools.compose_job_id(service, instance)):
            bounce_lib.delete_marathon_app(app_id, client)
            log_line = "Deleted stale marathon job that looks lost: %s" % app_id
            _log(service=service,
                 component='deploy',
                 level='event',
                 cluster=load_system_paasta_config().get_cluster(),
                 instance=instance,
                 line=log_line)
    except IOError:
        log.debug("%s is being bounced, skipping" % app_id)
    except Exception:
        loglines = ['Exception raised during cleanup of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            _log(service=service,
                 component='deploy',
                 level='debug',
                 cluster=load_system_paasta_config().get_cluster(),
                 instance=instance,
                 line=logline)
        raise
예제 #12
0
def start_chronos_job(service,
                      instance,
                      job_id,
                      client,
                      cluster,
                      job_config,
                      complete_job_config,
                      emergency=False):
    """
    Calls the 'manual start' Chronos endpoint (https://mesos.github.io/chronos/docs/api.html#manually-starting-a-job),
    running the job now regardless of its 'schedule'. The job's "schedule" is unmodified. If a job is disabled,
    this function does not do anything.
    """
    name = PaastaColors.cyan(job_id)

    # The job should be run immediately as long as the job is not disabled via the 'disabled' key in soa-configs or has
    # been previously stopped.
    if complete_job_config['disabled']:
        paasta_print(
            PaastaColors.red(
                "You cannot emergency start a disabled job. Run `paasta start` first."
            ))
    else:
        log_reason = PaastaColors.red(
            "EmergencyStart") if emergency else "Brutal bounce"
        _log(service=service,
             line="%s: Starting manual run of %s in Chronos" %
             (log_reason, name),
             component="deploy",
             level="event",
             cluster=cluster,
             instance=instance)

        client.update(complete_job_config)
        client.run(job_id)
예제 #13
0
def mark_for_deployment(git_url, deploy_group, service, commit):
    """Mark a docker image for deployment"""
    tag = get_paasta_tag_from_deploy_group(identifier=deploy_group, desired_state='deploy')
    remote_tag = format_tag(tag)
    ref_mutator = remote_git.make_force_push_mutate_refs_func(
        targets=[remote_tag],
        sha=commit,
    )
    try:
        remote_git.create_remote_refs(git_url=git_url, ref_mutator=ref_mutator, force=True)
    except Exception as e:
        loglines = ["Failed to mark %s for deployment in deploy group %s!" % (commit, deploy_group)]
        for line in str(e).split('\n'):
            loglines.append(line)
        return_code = 1
    else:
        loglines = ["Marked %s for deployment in deploy group %s" % (commit, deploy_group)]
        return_code = 0

    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='deploy',
            level='event',
        )
    return return_code
예제 #14
0
def bounce_chronos_job(
    service,
    instance,
    cluster,
    job_to_update,
    client,
):
    if job_to_update:
        log_line = 'Job to update: %s' % job_to_update
        _log(
            service=service,
            instance=instance,
            component='deploy',
            cluster=cluster,
            level='debug',
            line=log_line,
        )
        chronos_tools.update_job(client=client, job=job_to_update)
        log_line = 'Updated Chronos job: %s' % job_to_update['name']
        _log(
            service=service,
            instance=instance,
            component='deploy',
            cluster=cluster,
            level='event',
            line=log_line,
        )

    return (0, "All chronos bouncing tasks finished.")
예제 #15
0
 def on_enter_rolled_back(self):
     self.update_slack_status(
         f"Finished rolling back to `{self.old_git_sha[:8]}` in {self.deploy_group}"
     )
     line = f"Rollback to {self.old_git_sha[:8]} for {self.deploy_group} complete"
     _log(service=self.service, component="deploy", line=line, level="event")
     self.start_timer(self.auto_abandon_delay, "auto_abandon", "abandon")
def mark_for_deployment(git_url, deploy_group, service, commit):
    """Mark a docker image for deployment"""
    remote_branch = get_paasta_branch_from_deploy_group(identifier=deploy_group)
    ref_mutator = remote_git.make_force_push_mutate_refs_func(
        target_branches=[remote_branch],
        sha=commit,
    )
    try:
        remote_git.create_remote_refs(git_url=git_url, ref_mutator=ref_mutator, force=True)
    except Exception as e:
        loglines = ["Failed to mark %s in for deployment in deploy group %s!" % (commit, deploy_group)]
        for line in str(e).split('\n'):
            loglines.append(line)
        return_code = 1
    else:
        loglines = ["Marked %s in for deployment in deploy group %s" % (commit, deploy_group)]
        return_code = 0

    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='deploy',
            level='event',
        )
    return return_code
예제 #17
0
def get_desired_marathon_configs(soa_dir):
    cluster = load_system_paasta_config().get_cluster()
    instances = get_services_for_cluster(instance_type="marathon",
                                         cluster=cluster,
                                         soa_dir=soa_dir)

    job_configs = dict()
    formatted_marathon_configs = dict()

    for service, instance in instances:
        try:
            job_config = load_marathon_service_config(service=service,
                                                      instance=instance,
                                                      cluster=cluster,
                                                      soa_dir=soa_dir)

            formatted_config = job_config.format_marathon_app_dict()
            formatted_marathon_configs[formatted_config["id"].lstrip(
                "/")] = formatted_config
            job_configs[formatted_config["id"].lstrip("/")] = job_config
        # Not ideal but we rely on a lot of user input to create the app dict
        # and we really can't afford to bail if just one app definition is malformed
        except Exception as errormsg:
            _log(
                service=service,
                line=str(errormsg),
                component="deploy",
                level="debug",
                cluster=cluster,
                instance=instance,
            )
    return formatted_marathon_configs, job_configs
예제 #18
0
def wait_for_deployment(service, deploy_group, git_sha, soa_dir, timeout):
    cluster_map = get_cluster_instance_map_for_service(
        soa_dir=soa_dir, service=service, deploy_group=deploy_group)
    if not cluster_map:
        _log(service=service,
             component='deploy',
             line=("Couldn't find any instances for service {0} in deploy "
                   "group {1}".format(service, deploy_group)),
             level='event')
        raise NoInstancesFound
    paasta_print("Waiting for deployment of {0} for '{1}' complete...".format(
        git_sha, deploy_group))

    total_instances = 0
    clusters_data = []
    for cluster in cluster_map:
        clusters_data.append(
            ClusterData(cluster=cluster,
                        service=service,
                        git_sha=git_sha,
                        instances_queue=Queue()))
        for i in cluster_map[cluster]['instances']:
            clusters_data[-1].instances_queue.put(i)
        total_instances += len(cluster_map[cluster]['instances'])
    deadline = time.time() + timeout
    green_light = Event()
    green_light.set()

    with progressbar.ProgressBar(maxval=total_instances) as bar:
        while time.time() < deadline:
            _query_clusters(clusters_data, green_light)
            if not green_light.is_set():
                raise KeyboardInterrupt

            bar.update(total_instances - sum((c.instances_queue.qsize()
                                              for c in clusters_data)))

            if all((cluster.instances_queue.empty()
                    for cluster in clusters_data)):
                sys.stdout.flush()
                return 0
            else:
                time.sleep(min(10, timeout))
            sys.stdout.flush()

    _log(service=service,
         component='deploy',
         line=(
             "\n\nTimed out after {0} seconds, waiting for {2} in {1} to be "
             "deployed by PaaSTA. \n\n"
             "This probably means the deploy hasn't suceeded. The new service "
             "might not be healthy or one or more clusters could be having "
             "issues.\n\n"
             "To debug: try running:\n\n    paasta status -s {2} -vv\n"
             "    paasta logs -s {2}\n\nto determine the cause.\n\n"
             "If the service is known to be slow to start you may wish to "
             "increase the timeout on this step.".format(
                 timeout, deploy_group, service)),
         level='event')
    raise TimeoutError
예제 #19
0
def log_event(service_config, desired_state):
    user = utils.get_username()
    host = socket.getfqdn()
    line = "Issued request to change state of {} (an instance of {}) to '{}' by {}@{}".format(
        service_config.get_instance(),
        service_config.get_service(),
        desired_state,
        user,
        host,
    )
    utils._log(
        service=service_config.get_service(),
        level="event",
        cluster=service_config.get_cluster(),
        instance=service_config.get_instance(),
        component="deploy",
        line=line,
    )

    utils._log_audit(
        action=desired_state,
        service=service_config.get_service(),
        cluster=service_config.get_cluster(),
        instance=service_config.get_instance(),
    )
예제 #20
0
def mark_for_deployment(git_url, cluster, instance, service, commit):
    """Mark a docker image for deployment"""
    remote_branch = get_paasta_branch(cluster=cluster, instance=instance)
    ref_mutator = remote_git.make_force_push_mutate_refs_func(
        target_branches=[remote_branch],
        sha=commit,
    )
    try:
        remote_git.create_remote_refs(git_url=git_url, ref_mutator=ref_mutator, force=True)
    except Exception as e:
        loglines = ["Failed to mark %s in for deployment on %s in the %s cluster!" % (commit, instance, cluster)]
        for line in str(e).split('\n'):
            loglines.append(line)
        return_code = 1
    else:
        loglines = ["Marked %s in for deployment on %s in the %s cluster" % (commit, instance, cluster)]
        return_code = 0

    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
        )
    return return_code
예제 #21
0
def send_replication_event(instance_config, status, output):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param instance_config: an instance of LongRunningServiceConfig
    :param status: The status to emit for this event
    :param output: The output to emit for this event"""
    # This function assumes the input is a string like "mumble.main"
    monitoring_overrides = instance_config.get_monitoring()
    if "alert_after" not in monitoring_overrides:
        monitoring_overrides["alert_after"] = "2m"
    monitoring_overrides["check_every"] = "1m"
    monitoring_overrides["runbook"] = get_runbook(
        monitoring_overrides,
        instance_config.service,
        soa_dir=instance_config.soa_dir)

    check_name = "check_paasta_services_replication.%s" % instance_config.job_id
    send_event(
        service=instance_config.service,
        check_name=check_name,
        overrides=monitoring_overrides,
        status=status,
        output=output,
        soa_dir=instance_config.soa_dir,
        cluster=instance_config.cluster,
    )
    _log(
        service=instance_config.service,
        line="Replication: %s" % output,
        component="monitoring",
        level="debug",
        cluster=instance_config.cluster,
        instance=instance_config.instance,
    )
예제 #22
0
def paasta_push_to_registry(args):
    """Upload a docker image to a registry"""
    service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service)

    cmd = build_command(service, args.commit)
    loglines = []
    returncode, output = _run(
        cmd,
        timeout=3600,
        log=True,
        component='build',
        service=service,
        loglevel='debug'
    )
    if returncode != 0:
        loglines.append('ERROR: Failed to promote image for %s.' % args.commit)
        output = get_jenkins_build_output_url()
        if output:
            loglines.append('See output: %s' % output)
    else:
        loglines.append('Successfully pushed image for %s to registry' % args.commit)
    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='build',
            level='event',
        )
    return returncode
예제 #23
0
def start_chronos_job(service, instance, job_id, client, cluster, job_config, complete_job_config, emergency=False):
    """
    Calls the 'manual start' Chronos endpoint (https://mesos.github.io/chronos/docs/api.html#manually-starting-a-job),
    running the job now regardless of its 'schedule'. The job's "schedule" is unmodified. If a job is disabled,
    this function does not do anything.
    """
    name = PaastaColors.cyan(job_id)

    # The job should be run immediately as long as the job is not disabled via the 'disabled' key in soa-configs or has
    # been previously stopped.
    if complete_job_config['disabled']:
        print PaastaColors.red("You cannot emergency start a disabled job. Run `paasta start` first.")
    else:
        log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce"
        _log(
            service=service,
            line="%s: Starting manual run of %s in Chronos" % (log_reason, name),
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance
        )

        client.update(complete_job_config)
        client.run(job_id)
예제 #24
0
def paasta_push_to_registry(args):
    """Upload a docker image to a registry"""
    service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service, args.soa_dir)

    cmd = build_command(service, args.commit)
    loglines = []
    returncode, output = _run(cmd,
                              timeout=3600,
                              log=True,
                              stream=True,
                              component='build',
                              service=service,
                              loglevel='debug')
    if returncode != 0:
        loglines.append('ERROR: Failed to promote image for %s.' % args.commit)
        output = get_jenkins_build_output_url()
        if output:
            loglines.append('See output: %s' % output)
    else:
        loglines.append('Successfully pushed image for %s to registry' %
                        args.commit)
    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='build',
            level='event',
        )
    return returncode
예제 #25
0
def mark_for_deployment(git_url, deploy_group, service, commit):
    """Mark a docker image for deployment"""
    tag = get_paasta_tag_from_deploy_group(
        identifier=deploy_group, desired_state="deploy"
    )
    remote_tag = format_tag(tag)
    ref_mutator = remote_git.make_force_push_mutate_refs_func(
        targets=[remote_tag], sha=commit
    )

    max_attempts = 3
    for attempt in range(1, max_attempts + 1):
        try:
            remote_git.create_remote_refs(
                git_url=git_url, ref_mutator=ref_mutator, force=True
            )
        except Exception:
            logline = "Failed to mark {} for deployment in deploy group {}! (attempt {}/{})".format(
                commit, deploy_group, attempt, max_attempts
            )
            _log(service=service, line=logline, component="deploy", level="event")
            time.sleep(5 * attempt)
        else:
            logline = f"Marked {commit} for deployment in deploy group {deploy_group}"
            _log(service=service, line=logline, component="deploy", level="event")

            audit_action_details = {"deploy_group": deploy_group, "commit": commit}
            _log_audit(
                action="mark-for-deployment",
                action_details=audit_action_details,
                service=service,
            )

            return 0
    return 1
예제 #26
0
def paasta_wait_for_deployment(args):
    """Wrapping wait_for_deployment"""
    if args.verbose:
        log.setLevel(level=logging.DEBUG)
    else:
        log.setLevel(level=logging.INFO)

    service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]

    if args.git_url is None:
        args.git_url = get_git_url(service=service, soa_dir=args.soa_dir)

    try:
        validate_full_git_sha(args.commit)
    except ArgumentTypeError:
        refs = remote_git.list_remote_refs(args.git_url)
        commits = short_to_full_git_sha(short=args.commit, refs=refs)
        if len(commits) != 1:
            raise ValueError(
                "%s matched %d git shas (with refs pointing at them). Must match exactly 1."
                % (args.commit, len(commits)), )
        args.commit = commits[0]

    try:
        validate_service_name(service, soa_dir=args.soa_dir)
        validate_deploy_group(args.deploy_group, service, args.soa_dir)
        validate_git_sha(
            args.commit,
            args.git_url,
            args.deploy_group,
            service,
        )
    except (GitShaError, DeployGroupError, NoSuchService) as e:
        paasta_print(PaastaColors.red('{}'.format(e)))
        return 1

    try:
        wait_for_deployment(
            service=service,
            deploy_group=args.deploy_group,
            git_sha=args.commit,
            soa_dir=args.soa_dir,
            timeout=args.timeout,
        )
        _log(
            service=service,
            component='deploy',
            line=("Deployment of {} for {} complete".format(
                args.commit, args.deploy_group)),
            level='event',
        )

    except (KeyboardInterrupt, TimeoutError, NoSuchCluster):
        report_waiting_aborted(service, args.deploy_group)
        return 1

    return 0
예제 #27
0
 def log(self, line, level=DEFAULT_LOGLEVEL):
     _log(
         service=self.service_name,
         instance=self.instance_name,
         component='deploy',
         line=line,
         level=level,
     )
예제 #28
0
def delete_app(app_id, client, soa_dir):
    """Deletes a marathon app safely and logs to notify the user that it
    happened"""
    log.warn("%s appears to be old; attempting to delete" % app_id)
    service, instance, _, __ = marathon_tools.deformat_job_id(app_id)
    cluster = load_system_paasta_config().get_cluster()
    try:
        short_app_id = marathon_tools.compose_job_id(service, instance)
        with bounce_lib.bounce_lock_zookeeper(short_app_id):
            bounce_lib.delete_marathon_app(app_id, client)
        send_event(
            service=service,
            check_name='check_marathon_services_replication.%s' % short_app_id,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.OK,
            overrides={},
            output="This instance was removed and is no longer running",
        )
        send_event(
            service=service,
            check_name='setup_marathon_job.%s' % short_app_id,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.OK,
            overrides={},
            output="This instance was removed and is no longer running",
        )
        send_event(
            service=service,
            check_name='paasta_bounce_progress.%s' % short_app_id,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.OK,
            overrides={},
            output="This instance was removed and is no longer running",
        )
        log_line = "Deleted stale marathon job that looks lost: %s" % app_id
        _log(
            service=service,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
            line=log_line,
        )
    except IOError:
        log.debug("%s is being bounced, skipping" % app_id)
    except Exception:
        loglines = ['Exception raised during cleanup of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            _log(
                service=service,
                component='deploy',
                level='debug',
                cluster=load_system_paasta_config().get_cluster(),
                instance=instance,
                line=logline,
            )
        raise
예제 #29
0
def delete_app(app_id, client, soa_dir):
    """Deletes a marathon app safely and logs to notify the user that it
    happened"""
    log.warn("%s appears to be old; attempting to delete" % app_id)
    service, instance, _, __ = marathon_tools.deformat_job_id(app_id)
    cluster = load_system_paasta_config().get_cluster()
    try:
        short_app_id = marathon_tools.compose_job_id(service, instance)
        with bounce_lib.bounce_lock_zookeeper(short_app_id):
            bounce_lib.delete_marathon_app(app_id, client)
        send_event(
            service=service,
            check_name='check_marathon_services_replication.%s' % short_app_id,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.OK,
            overrides={},
            output="This instance was removed and is no longer running",
        )
        send_event(
            service=service,
            check_name='setup_marathon_job.%s' % short_app_id,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.OK,
            overrides={},
            output="This instance was removed and is no longer running",
        )
        send_event(
            service=service,
            check_name='paasta_bounce_progress.%s' % short_app_id,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.OK,
            overrides={},
            output="This instance was removed and is no longer running",
        )
        log_line = "Deleted stale marathon job that looks lost: %s" % app_id
        _log(
            service=service,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
            line=log_line,
        )
    except IOError:
        log.debug("%s is being bounced, skipping" % app_id)
    except Exception:
        loglines = ['Exception raised during cleanup of service %s:' % service]
        loglines.extend(traceback.format_exc().rstrip().split("\n"))
        for logline in loglines:
            _log(
                service=service,
                component='deploy',
                level='debug',
                cluster=load_system_paasta_config().get_cluster(),
                instance=instance,
                line=logline,
            )
        raise
예제 #30
0
 def on_enter_deployed(self):
     line = f"Deployment of {self.commit} for {self.deploy_group} complete"
     _log(
         service=self.service,
         component='deploy',
         line=line,
         level='event',
     )
     self.slack_notifier.notify_after_good_deploy()
예제 #31
0
def write_to_log(config, line, level='event'):
    _log(
        service=config.service,
        line="%s: %s" % (format_job_id(config.service, config.instance), line),
        component='deploy',
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
예제 #32
0
파일: utils.py 프로젝트: lydian/paasta
def trigger_deploys(service):
    """Connects to the deploymentsd watcher on sysgit, which is an extremely simple
    service that listens for a service string and then generates a service deployment"""
    logline = f"Notifying sysgit to generate a deployment for {service}"
    _log(service=service, line=logline, component="deploy", level="event")
    client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    client.connect(("sysgit.yelpcorp.com", 5049))
    client.send(f"{service}\n".encode("utf-8"))
    client.close()
예제 #33
0
def write_to_log(config, line, level='event'):
    _log(
        service=config.service,
        line=line,
        component='deploy',
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
예제 #34
0
def write_to_log(config, line, level='event'):
    _log(
        service=config.service,
        line="%s: %s" % (format_job_id(config.service, config.instance), line),
        component='deploy',
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
예제 #35
0
def write_to_log(config, line, level="event"):
    _log(
        service=config.service,
        line=line,
        component="deploy",
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
예제 #36
0
def write_to_log(config, line, level='event'):
    _log(
        service=config.service,
        line=line,
        component='deploy',
        level=level,
        cluster=config.cluster,
        instance=config.instance,
    )
예제 #37
0
def send_replication_event(
    instance_config,
    status,
    output,
    description,
    dry_run=False,
):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param instance_config: an instance of LongRunningServiceConfig
    :param status: The status to emit for this event
    :param output: The output to emit for this event
    :param dry_run: Print the event instead of emitting it
    """
    # This function assumes the input is a string like "mumble.main"
    monitoring_overrides = instance_config.get_monitoring()
    if "alert_after" not in monitoring_overrides:
        monitoring_overrides["alert_after"] = "2m"
    monitoring_overrides["check_every"] = "1m"
    monitoring_overrides["runbook"] = __get_monitoring_config_value(
        "runbook",
        monitoring_overrides,
        instance_config.service,
        soa_dir=instance_config.soa_dir,
        monitoring_defaults=lambda _: DEFAULT_REPLICATION_RUNBOOK,
    )
    monitoring_overrides["tip"] = __get_monitoring_config_value(
        "tip",
        monitoring_overrides,
        instance_config.service,
        soa_dir=instance_config.soa_dir,
        monitoring_defaults=lambda _: (
            f"Check the instance with: `paasta status -s {instance_config.service} "
            f"-i {instance_config.instance} -c {instance_config.cluster} -vv`"
        ),
    )
    monitoring_overrides["description"] = description

    check_name = "check_paasta_services_replication.%s" % instance_config.job_id
    send_event(
        service=instance_config.service,
        check_name=check_name,
        overrides=monitoring_overrides,
        status=status,
        output=output,
        soa_dir=instance_config.soa_dir,
        cluster=instance_config.cluster,
        dry_run=dry_run,
    )
    _log(
        service=instance_config.service,
        line="Replication: %s" % output,
        component="monitoring",
        level="debug",
        cluster=instance_config.cluster,
        instance=instance_config.instance,
    )
예제 #38
0
def wait_for_deployment(service, deploy_group, git_sha, soa_dir, timeout):
    cluster_map = get_cluster_instance_map_for_service(soa_dir, service,
                                                       deploy_group)
    if not cluster_map:
        line = "Couldn't find any instances for service {0} in deploy group {1}".format(
            service, deploy_group)
        _log(service=service, component='deploy', line=line, level='event')
        raise NoInstancesFound
    paasta_print("Waiting for deployment of {0} for '{1}' complete...".format(
        git_sha, deploy_group))
    for cluster in cluster_map.values():
        cluster['deployed'] = 0
    try:
        with Timeout(seconds=timeout):
            total_instances = sum(
                [len(v["instances"]) for v in cluster_map.values()])
            with progressbar.ProgressBar(maxval=total_instances) as bar:
                while True:
                    for cluster, instances in cluster_map.items():
                        if cluster_map[cluster]['deployed'] != len(
                                cluster_map[cluster]['instances']):
                            cluster_map[cluster][
                                'deployed'] = instances_deployed(
                                    cluster=cluster,
                                    service=service,
                                    instances=instances['instances'],
                                    git_sha=git_sha)
                            if cluster_map[cluster]['deployed'] == len(
                                    cluster_map[cluster]['instances']):
                                instance_csv = ", ".join(
                                    cluster_map[cluster]['instances'])
                                paasta_print(
                                    "Deploy to %s complete! (instances: %s)" %
                                    (cluster, instance_csv))
                        bar.update(
                            sum([v["deployed"] for v in cluster_map.values()]))
                    if all([
                            cluster['deployed'] == len(cluster["instances"])
                            for cluster in cluster_map.values()
                    ]):
                        break
                    else:
                        time.sleep(10)
    except TimeoutError:
        line = "\n\nTimed out after {0} seconds, waiting for {2} in {1} to be deployed by PaaSTA. \n\n"\
               "This probably means the deploy hasn't suceeded. The new service might not be healthy or one "\
               "or more clusters could be having issues.\n\n"\
               "To debug: try running:\n\n"\
               "    paasta status -s {2} -vv\n"\
               "    paasta logs -s {2}\n\n"\
               "to determine the cause.\n\n"\
               "If the service is known to be slow to start you may wish to increase "\
               "the timeout on this step.".format(timeout, deploy_group, service)
        _log(service=service, component='deploy', line=line, level='event')
        raise
    return True
예제 #39
0
def scale_marathon_job(service, instance, app_id, delta, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(service=service,
         line="EmergencyScale: Scaling %s %s by %d instances" %
         (name, 'down' if delta < 0 else 'up', abs(int(delta))),
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance)
    client.scale_app(app_id, delta=int(delta), force=True)
예제 #40
0
def wait_for_deployment(service, deploy_group, git_sha, soa_dir, timeout):
    cluster_map = get_cluster_instance_map_for_service(soa_dir, service, deploy_group)
    if not cluster_map:
        line = "Couldn't find any instances for service {0} in deploy group {1}".format(service, deploy_group)
        _log(
            service=service,
            component='deploy',
            line=line,
            level='event'
        )
        raise NoInstancesFound
    paasta_print("Waiting for deployment of {0} for '{1}' complete..."
                 .format(git_sha, deploy_group))
    for cluster in cluster_map.values():
        cluster['deployed'] = 0
    try:
        with Timeout(seconds=timeout):
            total_instances = sum([len(v["instances"]) for v in cluster_map.values()])
            with progressbar.ProgressBar(maxval=total_instances) as bar:
                while True:
                    for cluster, instances in cluster_map.items():
                        if cluster_map[cluster]['deployed'] != len(cluster_map[cluster]['instances']):
                            cluster_map[cluster]['deployed'] = instances_deployed(
                                cluster=cluster,
                                service=service,
                                instances=instances['instances'],
                                git_sha=git_sha)
                            if cluster_map[cluster]['deployed'] == len(cluster_map[cluster]['instances']):
                                instance_csv = ", ".join(cluster_map[cluster]['instances'])
                                paasta_print("Deploy to %s complete! (instances: %s)" % (cluster, instance_csv))
                        bar.update(sum([v["deployed"] for v in cluster_map.values()]))
                    if all([cluster['deployed'] == len(cluster["instances"]) for cluster in cluster_map.values()]):
                        sys.stdout.flush()
                        break
                    else:
                        time.sleep(10)
                    sys.stdout.flush()
    except TimeoutError:
        line = "\n\nTimed out after {0} seconds, waiting for {2} in {1} to be deployed by PaaSTA. \n\n"\
               "This probably means the deploy hasn't suceeded. The new service might not be healthy or one "\
               "or more clusters could be having issues.\n\n"\
               "To debug: try running:\n\n"\
               "    paasta status -s {2} -vv\n"\
               "    paasta logs -s {2}\n\n"\
               "to determine the cause.\n\n"\
               "If the service is known to be slow to start you may wish to increase "\
               "the timeout on this step.".format(timeout, deploy_group, service)
        _log(
            service=service,
            component='deploy',
            line=line,
            level='event'
        )
        raise
    return True
예제 #41
0
def restart_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyRestart: Scaling %s down to 0 instances, then letting them scale back up" % (name),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=0, force=True)
예제 #42
0
def log_to_paasta(log_line):
    """Add the event to the standard PaaSTA logging backend."""
    line = ('oom-killer killed %s on %s (container_id: %s).'
            % (
                'a %s process' % log_line.process_name if log_line.process_name else 'a process',
                log_line.hostname, log_line.container_id,
            ))
    _log(
        service=log_line.service, instance=log_line.instance, component='oom',
        cluster=log_line.cluster, level=DEFAULT_LOGLEVEL, line=line,
    )
예제 #43
0
def restart_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyRestart: Scaling %s down to 0 instances, then letting them scale back up" % (name),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=0, force=True)
예제 #44
0
def start_marathon_job(service, instance, app_id, normal_instance_count, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyStart: scaling %s up to %d instances" % (name, normal_instance_count),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=normal_instance_count, force=True)
예제 #45
0
def stop_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyStop: Scaling %s down to 0 instances" % (name),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, instances=0, force=True)  # TODO do we want to capture the return val of any client calls?
예제 #46
0
def scale_marathon_job(service, instance, app_id, delta, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(
        service=service,
        line="EmergencyScale: Scaling %s %s by %d instances" % (name, 'down' if delta < 0 else 'up', abs(int(delta))),
        component='deploy',
        level='event',
        cluster=cluster,
        instance=instance
    )
    client.scale_app(app_id, delta=int(delta), force=True)
예제 #47
0
def stop_marathon_job(service, instance, app_id, client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(service=service,
         line="EmergencyStop: Scaling %s down to 0 instances" % (name),
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance)
    client.scale_app(
        app_id, instances=0, force=True
    )  # TODO do we want to capture the return val of any client calls?
예제 #48
0
def start_marathon_job(service, instance, app_id, normal_instance_count,
                       client, cluster):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    _log(service=service,
         line="EmergencyStart: scaling %s up to %d instances" %
         (name, normal_instance_count),
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance)
    client.scale_app(app_id, instances=normal_instance_count, force=True)
예제 #49
0
def log_event(service_config, desired_state):
    user = utils.get_username()
    host = socket.getfqdn()
    line = "Issued request to change state of %s to '%s' by %s@%s" % (
        service_config.get_instance(), desired_state, user, host)
    utils._log(
        service=service_config.get_service(),
        level='event',
        cluster=service_config.get_cluster(),
        instance=service_config.get_instance(),
        component='deploy',
        line=line,
    )
예제 #50
0
파일: itest.py 프로젝트: somic/paasta
def paasta_itest(args):
    """Build and test a docker image"""
    service = args.service
    soa_dir = args.soa_dir
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service, soa_dir=soa_dir)

    tag = build_docker_tag(service, args.commit)
    run_env = os.environ.copy()
    run_env['DOCKER_TAG'] = tag
    cmd = "make itest"
    loglines = []

    _log(
        service=service,
        line='starting itest for %s.' % args.commit,
        component='build',
        level='event'
    )
    returncode, output = _run(
        cmd,
        env=run_env,
        timeout=3600,
        log=True,
        component='build',
        service=service,
        loglevel='debug',
        stream=True,
    )
    if returncode != 0:
        loglines.append(
            'ERROR: itest failed for %s.' % args.commit
        )
        output = get_jenkins_build_output_url()
        if output:
            loglines.append('See output: %s' % output)
    else:
        loglines.append('itest passed for %s.' % args.commit)
        if not check_docker_image(service, args.commit):
            loglines.append('ERROR: itest has not created %s' % tag)
            returncode = 1
    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='build',
            level='event',
        )
    return returncode
예제 #51
0
def stop_chronos_job(service, instance, client, cluster, existing_jobs, emergency=False):
    log_reason = PaastaColors.red("EmergencyStop") if emergency else "Brutal bounce"
    for job in existing_jobs:
        name = PaastaColors.cyan(job["name"])
        _log(
            service=service,
            line="%s: Killing all tasks for job %s" % (log_reason, name),
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance
        )
        job["disabled"] = True
        client.update(job)
        client.delete_tasks(job["name"])
예제 #52
0
def start_chronos_job(service, instance, job_id, client, cluster, job_config, emergency=False):
    name = PaastaColors.cyan(job_id)
    log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce"
    log_immediate_run = " and running it immediately" if not job_config["disabled"] else ""
    _log(
        service=service,
        line="%s: Sending job %s to Chronos%s" % (log_reason, name, log_immediate_run),
        component="deploy",
        level="event",
        cluster=cluster,
        instance=instance
    )
    client.update(job_config)
    # TODO fail or give some output/feedback to user that the job won't run immediately if disabled (PAASTA-1244)
    if not job_config["disabled"]:
        client.run(job_id)
def bounce_chronos_job(
    service,
    instance,
    cluster,
    job_to_update,
    client
):
    if job_to_update:
        log_line = 'Job to update: %s' % job_to_update
        _log(service=service, instance=instance, component='deploy',
             cluster=cluster, level='debug', line=log_line)
        chronos_tools.update_job(client=client, job=job_to_update)
        log_line = 'Updated Chronos job: %s' % job_to_update['name']
        _log(service=service, instance=instance, component='deploy',
             cluster=cluster, level='event', line=log_line)

    return (0, "All chronos bouncing tasks finished.")
예제 #54
0
 def log_bounce_action(line, level='debug'):
     return _log(
         service=service,
         line=line,
         component='deploy',
         level=level,
         cluster=cluster,
         instance=instance
     )
예제 #55
0
 def log_deploy_error(errormsg, level='event'):
     return _log(
         service=service,
         line=errormsg,
         component='deploy',
         level='event',
         cluster=cluster,
         instance=instance
     )
예제 #56
0
def paasta_wait_for_deployment(args):
    """Wrapping wait_for_deployment"""
    if args.verbose:
        log.setLevel(level=logging.DEBUG)
    else:
        log.setLevel(level=logging.INFO)

    service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]

    if args.git_url is None:
        args.git_url = get_git_url(service=service, soa_dir=args.soa_dir)

    try:
        validate_service_name(service, soa_dir=args.soa_dir)
        validate_deploy_group(args.deploy_group, service, args.soa_dir)
        validate_git_sha(args.commit, args.git_url, args.deploy_group, service)
    except (GitShaError, DeployGroupError, NoSuchService) as e:
        paasta_print(PaastaColors.red('{}'.format(e)))
        return 1

    try:
        wait_for_deployment(
            service=service,
            deploy_group=args.deploy_group,
            git_sha=args.commit,
            soa_dir=args.soa_dir,
            timeout=args.timeout)
        _log(
            service=service,
            component='deploy',
            line=("Deployment of {0} for {1} complete".format(
                args.commit, args.deploy_group)),
            level='event')

    except (KeyboardInterrupt, TimeoutError):
        paasta_print("Waiting for deployment aborted.")
        return 1
    except NoInstancesFound:
        return 1

    return 0
예제 #57
0
def paasta_cook_image(args, service=None, soa_dir=None):
    """Build a docker image"""
    if service:
        service = service
    else:
        service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service, soa_dir)

    run_env = os.environ.copy()
    default_tag = 'paasta-cook-image-%s-%s' % (service, get_username())
    tag = run_env.get('DOCKER_TAG', default_tag)
    run_env['DOCKER_TAG'] = tag

    if not makefile_responds_to('cook-image'):
        sys.stderr.write('ERROR: local-run now requires a cook-image target to be present in the Makefile. See '
                         'http://paasta.readthedocs.io/en/latest/about/contract.html\n')
        return 1

    try:
        cmd = 'make cook-image'
        returncode, output = _run(
            cmd,
            env=run_env,
            log=True,
            component='build',
            service=service,
            loglevel='debug'
        )
        if returncode != 0:
            _log(
                service=service,
                line='ERROR: make cook-image failed for %s.' % service,
                component='build',
                level='event',
            )
        return returncode

    except KeyboardInterrupt:
        sys.stderr.write('\nProcess interrupted by the user. Cancelling.\n')
        return 2
예제 #58
0
def paasta_push_to_registry(args):
    """Upload a docker image to a registry"""
    service = args.service
    if service and service.startswith("services-"):
        service = service.split("services-", 1)[1]
    validate_service_name(service)

    cmd = build_command(service, args.commit)
    loglines = []
    returncode, output = _run(cmd, timeout=3600, log=True, component="build", service=service, loglevel="debug")
    if returncode != 0:
        loglines.append("ERROR: Failed to promote image for %s." % args.commit)
        output = get_jenkins_build_output_url()
        if output:
            loglines.append("See output: %s" % output)
    else:
        loglines.append("Successfully pushed image for %s to registry" % args.commit)
    for logline in loglines:
        _log(service=service, line=logline, component="build", level="event")
    sys.exit(returncode)
예제 #59
0
def wait_for_deployment(service, deploy_group, git_sha, soa_dir, timeout):
    cluster_map = get_cluster_instance_map_for_service(soa_dir, service, deploy_group)
    if not cluster_map:
        line = "Couldn't find any instances for service {0} in deploy group {1}".format(service, deploy_group)
        _log(
            service=service,
            component='deploy',
            line=line,
            level='event'
        )
        raise NoInstancesFound
    for cluster in cluster_map.values():
        cluster['deployed'] = 0
    try:
        with Timeout(seconds=timeout):
            total_instances = sum([len(v["instances"]) for v in cluster_map.values()])
            with progressbar.ProgressBar(maxval=total_instances) as bar:
                while True:
                    for cluster, instances in cluster_map.items():
                        if cluster_map[cluster]['deployed'] != len(cluster_map[cluster]['instances']):
                            cluster_map[cluster]['deployed'] = instances_deployed(
                                cluster=cluster,
                                service=service,
                                instances=instances['instances'],
                                git_sha=git_sha)
                            if cluster_map[cluster]['deployed'] == len(cluster_map[cluster]['instances']):
                                instance_csv = ", ".join(cluster_map[cluster]['instances'])
                                print "Deploy to %s complete! (instances: %s)" % (cluster, instance_csv)
                        bar.update(sum([v["deployed"] for v in cluster_map.values()]))
                    if all([cluster['deployed'] == len(cluster["instances"]) for cluster in cluster_map.values()]):
                        break
                    else:
                        time.sleep(10)
    except TimeoutError:
        human_status = ["{0}: {1}".format(cluster, data['deployed']) for cluster, data in cluster_map.items()]
        line = "\nCurrent deployment status of {0} per cluster:\n".format(deploy_group) + "\n".join(human_status)
        _log(
            service=service,
            component='deploy',
            line=line,
            level='event'
        )
        line = "\n\nTimed out after {0} seconds, waiting for {1} in {2} to be deployed by PaaSTA. \n\n"\
               "This probably means the deploy hasn't suceeded. The new service might not be healthy or one "\
               "or more clusters could be having issues.\n\n"\
               "To debug: try running 'paasta status -s {2} -vv' or 'paasta logs -s {2}' to determine the cause.\n\n"\
               "{3} is still *marked* for deployment. To rollback, you can run: 'paasta rollback --service "\
               "{2} --deploy-group {1}'\n\n"\
               "If the service is known to be slow to start you may wish to increase "\
               "the timeout on this step.".format(timeout, deploy_group, service, git_sha)
        _log(
            service=service,
            component='deploy',
            line=line,
            level='event'
        )
        raise
    return True
예제 #60
0
def paasta_mark_for_deployment(args):
    """Mark a docker image for deployment"""
    service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service)
    cmd = build_command(args.git_url, args.commit, args.clusterinstance)
    # Clusterinstance should be in cluster.instance format
    cluster, instance = args.clusterinstance.split('.')
    returncode, output = _run(
        cmd,
        timeout=30,
    )
    loglines = get_loglines(returncode=returncode, cmd=cmd, output=output, args=args)
    for logline in loglines:
        _log(
            service=service,
            line=logline,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
        )
    sys.exit(returncode)