Ejemplo n.º 1
0
def main(args):
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()

    # get those jobs listed in configs
    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)

    service_job_mapping = build_service_job_mapping(client, configured_jobs)
    for service_instance, job_state_pairs in service_job_mapping.items():
        service, instance = service_instance[0], service_instance[1]
        sensu_output, sensu_status = sensu_message_status_for_jobs(service, instance, job_state_pairs)
        monitoring_overrides = compose_monitoring_overrides_for_service(
            cluster=system_paasta_config.get_cluster(),
            service=service,
            instance=instance,
            soa_dir=args.soa_dir
        )
        send_event_to_sensu(
            service=service,
            instance=instance,
            monitoring_overrides=monitoring_overrides,
            status_code=sensu_status,
            message=sensu_output,
            soa_dir=args.soa_dir,
        )
Ejemplo n.º 2
0
def main(args):
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()

    # get those jobs listed in configs
    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(
        soa_dir=args.soa_dir)

    service_job_mapping = build_service_job_mapping(client, configured_jobs)
    for service_instance, job_state_pairs in service_job_mapping.items():
        service, instance = service_instance[0], service_instance[1]
        sensu_output, sensu_status = sensu_message_status_for_jobs(
            service, instance, job_state_pairs)
        monitoring_overrides = compose_monitoring_overrides_for_service(
            cluster=system_paasta_config.get_cluster(),
            service=service,
            instance=instance,
            soa_dir=args.soa_dir)
        send_event_to_sensu(
            service=service,
            instance=instance,
            monitoring_overrides=monitoring_overrides,
            status_code=sensu_status,
            message=sensu_output,
            soa_dir=args.soa_dir,
        )
Ejemplo n.º 3
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(client, configured_jobs)

        for service_instance, chronos_job in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            try:
                chronos_job_config = load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
            except utils.NoDeploymentsAvailable:
                log.info("Skipping %s because no deployments are available" % service)
                continue
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                chronos_job=chronos_job,
                client=client,
            )
            if sensu_status is not None:
                send_event(chronos_job_config, sensu_status, sensu_output)
    except (chronos.ChronosAPIError) as e:
        log.error("CRITICAL: Unable to contact Chronos! Error: %s" % e)
        sys.exit(2)
Ejemplo n.º 4
0
def main():
    args = parse_args()
    jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster=args.cluster, soa_dir=args.soa_dir)
    # TODO use compose_job_id instead of constructing string once INTERNAL_SPACER deprecated
    composed = ['%s%s%s' % (name, chronos_tools.INTERNAL_SPACER, job) for name, job in jobs]
    print '\n'.join(composed)
    sys.exit(0)
Ejemplo n.º 5
0
def main():
    args = parse_args()
    jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster=args.cluster, soa_dir=args.soa_dir)
    # TODO use compose_job_id instead of constructing string once INTERNAL_SPACER deprecated
    composed = ['%s%s%s' % (name, chronos_tools.INTERNAL_SPACER, job) for name, job in jobs]
    paasta_print('\n'.join(composed))
    sys.exit(0)
Ejemplo n.º 6
0
def main():

    args = parse_args()

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    # get_chronos_jobs_for_cluster returns (service, job)
    expected_service_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)

    # filter jobs not related to paasta
    # and decompose into (service, instance, tag) tuples
    paasta_jobs = filter_paasta_jobs(deployed_job_names(client))
    running_service_jobs = [chronos_tools.decompose_job_id(job) for job in paasta_jobs]

    to_delete = jobs_to_delete(expected_service_jobs, running_service_jobs)

    # recompose the job ids again for deletion
    to_delete_job_ids = [chronos_tools.compose_job_id(*job) for job in to_delete]

    task_responses = cleanup_tasks(client, to_delete_job_ids)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete_job_ids)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)

    if len(to_delete) == 0:
        print 'No Chronos Jobs to remove'
    else:
        if len(task_successes) > 0:
            print format_list_output("Successfully Removed Tasks (if any were running) for:",
                                     [job[0] for job in task_successes])

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])

        if len(job_successes) > 0:
            print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)
Ejemplo n.º 7
0
def main():

    args = parse_args()

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    # get_chronos_jobs_for_cluster returns (service, job)
    expected_service_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)

    # filter jobs not related to paasta
    # and decompose into (service, instance, tag) tuples
    paasta_jobs = filter_paasta_jobs(deployed_job_names(client))
    running_service_jobs = [chronos_tools.decompose_job_id(job) for job in paasta_jobs]

    to_delete = jobs_to_delete(expected_service_jobs, running_service_jobs)

    # recompose the job ids again for deletion
    to_delete_job_ids = [chronos_tools.compose_job_id(*job) for job in to_delete]

    task_responses = cleanup_tasks(client, to_delete_job_ids)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete_job_ids)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)

    if len(to_delete) == 0:
        print 'No Chronos Jobs to remove'
    else:
        if len(task_successes) > 0:
            print format_list_output("Successfully Removed Tasks (if any were running) for:",
                                     [job[0] for job in task_successes])

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])

        if len(job_successes) > 0:
            print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)
Ejemplo n.º 8
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(
        cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(
            client, configured_jobs)

        for service_instance, job_state_pairs in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            try:
                chronos_job_config = load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
            except utils.NoDeploymentsAvailable:
                paasta_print(
                    utils.PaastaColors.cyan(
                        "Skipping %s because no deployments are available" %
                        service))
                continue
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                service=service,
                instance=instance,
                cluster=cluster,
                job_state_pairs=job_state_pairs)
            if sensu_status is not None:
                monitoring_overrides = compose_monitoring_overrides_for_service(
                    chronos_job_config=chronos_job_config, soa_dir=soa_dir)
                send_event(
                    service=service,
                    instance=instance,
                    monitoring_overrides=monitoring_overrides,
                    status_code=sensu_status,
                    message=sensu_output,
                    soa_dir=soa_dir,
                )
    except (chronos.ChronosAPIError) as e:
        paasta_print(
            utils.PaastaColors.red(
                "CRITICAL: Unable to contact Chronos! Error: %s" % e))
        sys.exit(2)
Ejemplo n.º 9
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(client, configured_jobs)

        for service_instance, job_state_pairs in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            try:
                chronos_job_config = load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
            except utils.NoDeploymentsAvailable:
                paasta_print(utils.PaastaColors.cyan("Skipping %s because no deployments are available" % service))
                continue
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                service=service,
                instance=instance,
                cluster=cluster,
                job_state_pairs=job_state_pairs
            )
            if sensu_status is not None:
                monitoring_overrides = compose_monitoring_overrides_for_service(
                    chronos_job_config=chronos_job_config,
                    soa_dir=soa_dir
                )
                send_event(
                    service=service,
                    instance=instance,
                    monitoring_overrides=monitoring_overrides,
                    status_code=sensu_status,
                    message=sensu_output,
                    soa_dir=soa_dir,
                )
    except (chronos.ChronosAPIError) as e:
        paasta_print(utils.PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e))
        sys.exit(2)
Ejemplo n.º 10
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(client, configured_jobs)

        for service_instance, job_state_pairs in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            chronos_job_config = load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            )
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                service=service,
                instance=instance,
                cluster=cluster,
                job_state_pairs=job_state_pairs
            )
            monitoring_overrides = compose_monitoring_overrides_for_service(
                chronos_job_config=chronos_job_config,
                soa_dir=soa_dir
            )
            send_event(
                service=service,
                instance=instance,
                monitoring_overrides=monitoring_overrides,
                status_code=sensu_status,
                message=sensu_output,
                soa_dir=soa_dir,
            )
    except (ServerNotFoundError, chronos.ChronosAPIError, socket_error) as e:
        print(utils.PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e))
        sys.exit(2)
Ejemplo n.º 11
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    service_job_mapping = build_service_job_mapping(client, configured_jobs)
    for service_instance, job_state_pairs in service_job_mapping.items():
        service, instance = service_instance[0], service_instance[1]
        chronos_job_config = load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
        sensu_output, sensu_status = sensu_message_status_for_jobs(
            chronos_job_config=chronos_job_config,
            service=service,
            instance=instance,
            cluster=cluster,
            job_state_pairs=job_state_pairs
        )
        monitoring_overrides = compose_monitoring_overrides_for_service(
            chronos_job_config=chronos_job_config,
            soa_dir=soa_dir
        )
        send_event(
            service=service,
            instance=instance,
            monitoring_overrides=monitoring_overrides,
            status_code=sensu_status,
            message=sensu_output,
            soa_dir=soa_dir,
        )
Ejemplo n.º 12
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(
        cluster, soa_dir=soa_dir)

    service_job_mapping = build_service_job_mapping(client, configured_jobs)
    for service_instance, job_state_pairs in service_job_mapping.items():
        service, instance = service_instance[0], service_instance[1]
        chronos_job_config = load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
        sensu_output, sensu_status = sensu_message_status_for_jobs(
            chronos_job_config=chronos_job_config,
            service=service,
            instance=instance,
            cluster=cluster,
            job_state_pairs=job_state_pairs)
        monitoring_overrides = compose_monitoring_overrides_for_service(
            chronos_job_config=chronos_job_config, soa_dir=soa_dir)
        send_event(
            service=service,
            instance=instance,
            monitoring_overrides=monitoring_overrides,
            status_code=sensu_status,
            message=sensu_output,
            soa_dir=soa_dir,
        )
Ejemplo n.º 13
0
def main():

    args = parse_args()
    soa_dir = args.soa_dir

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    running_jobs = set(deployed_job_names(client))

    expected_service_jobs = set([chronos_tools.compose_job_id(*job) for job in
                                 chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)])

    all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs)))
    expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs))
    valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs

    to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs

    task_responses = cleanup_tasks(client, to_delete)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)
            try:
                (service, instance) = chronos_tools.decompose_job_id(response[0])
                send_event(
                    service=service,
                    instance=instance,
                    monitoring_overrides={},
                    soa_dir=soa_dir,
                    status_code=pysensu_yelp.Status.OK,
                    message="This instance was removed and is no longer supposed to be scheduled.",
                )
            except InvalidJobNameError:
                # If we deleted some bogus job with a bogus jobid that could not be parsed,
                # Just move on, no need to send any kind of paasta event.
                pass

    if len(to_delete) == 0:
        print 'No Chronos Jobs to remove'
    else:
        if len(task_successes) > 0:
            print format_list_output("Successfully Removed Tasks (if any were running) for:",
                                     [job[0] for job in task_successes])

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])

        if len(job_successes) > 0:
            print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)
Ejemplo n.º 14
0
def main():

    args = parse_args()
    soa_dir = args.soa_dir

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    running_jobs = set(deployed_job_names(client))

    expected_service_jobs = {chronos_tools.compose_job_id(*job) for job in
                             chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)}

    all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs)))
    expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs, cluster=cluster, soa_dir=soa_dir))
    valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs

    to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs

    task_responses = cleanup_tasks(client, to_delete)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)
            try:
                (service, instance) = chronos_tools.decompose_job_id(response[0])
                monitoring_tools.send_event(
                    check_name=check_chronos_job_name(service, instance),
                    service=service,
                    overrides={},
                    soa_dir=soa_dir,
                    status=pysensu_yelp.Status.OK,
                    output="This instance was removed and is no longer supposed to be scheduled.",
                )
            except InvalidJobNameError:
                # If we deleted some bogus job with a bogus jobid that could not be parsed,
                # Just move on, no need to send any kind of paasta event.
                pass

    if len(to_delete) == 0:
        paasta_print('No Chronos Jobs to remove')
    else:
        if len(task_successes) > 0:
            paasta_print(format_list_output(
                "Successfully Removed Tasks (if any were running) for:",
                [job[0] for job in task_successes],
            ))

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            paasta_print(format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures]))

        if len(job_successes) > 0:
            paasta_print(format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes]))

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            paasta_print(format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures]))

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)