def main(args): config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() # get those jobs listed in configs configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir) service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] sensu_output, sensu_status = sensu_message_status_for_jobs(service, instance, job_state_pairs) monitoring_overrides = compose_monitoring_overrides_for_service( cluster=system_paasta_config.get_cluster(), service=service, instance=instance, soa_dir=args.soa_dir ) send_event_to_sensu( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=args.soa_dir, )
def main(args): config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() # get those jobs listed in configs configured_jobs = chronos_tools.get_chronos_jobs_for_cluster( soa_dir=args.soa_dir) service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] sensu_output, sensu_status = sensu_message_status_for_jobs( service, instance, job_state_pairs) monitoring_overrides = compose_monitoring_overrides_for_service( cluster=system_paasta_config.get_cluster(), service=service, instance=instance, soa_dir=args.soa_dir) send_event_to_sensu( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=args.soa_dir, )
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir) try: service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, chronos_job in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] try: chronos_job_config = load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) except utils.NoDeploymentsAvailable: log.info("Skipping %s because no deployments are available" % service) continue sensu_output, sensu_status = sensu_message_status_for_jobs( chronos_job_config=chronos_job_config, chronos_job=chronos_job, client=client, ) if sensu_status is not None: send_event(chronos_job_config, sensu_status, sensu_output) except (chronos.ChronosAPIError) as e: log.error("CRITICAL: Unable to contact Chronos! Error: %s" % e) sys.exit(2)
def main(): args = parse_args() jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster=args.cluster, soa_dir=args.soa_dir) # TODO use compose_job_id instead of constructing string once INTERNAL_SPACER deprecated composed = ['%s%s%s' % (name, chronos_tools.INTERNAL_SPACER, job) for name, job in jobs] print '\n'.join(composed) sys.exit(0)
def main(): args = parse_args() jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster=args.cluster, soa_dir=args.soa_dir) # TODO use compose_job_id instead of constructing string once INTERNAL_SPACER deprecated composed = ['%s%s%s' % (name, chronos_tools.INTERNAL_SPACER, job) for name, job in jobs] paasta_print('\n'.join(composed)) sys.exit(0)
def main(): args = parse_args() config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) # get_chronos_jobs_for_cluster returns (service, job) expected_service_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir) # filter jobs not related to paasta # and decompose into (service, instance, tag) tuples paasta_jobs = filter_paasta_jobs(deployed_job_names(client)) running_service_jobs = [chronos_tools.decompose_job_id(job) for job in paasta_jobs] to_delete = jobs_to_delete(expected_service_jobs, running_service_jobs) # recompose the job ids again for deletion to_delete_job_ids = [chronos_tools.compose_job_id(*job) for job in to_delete] task_responses = cleanup_tasks(client, to_delete_job_ids) task_successes = [] task_failures = [] for response in task_responses: if isinstance(response[-1], Exception): task_failures.append(response) else: task_successes.append(response) job_responses = cleanup_jobs(client, to_delete_job_ids) job_successes = [] job_failures = [] for response in job_responses: if isinstance(response[-1], Exception): job_failures.append(response) else: job_successes.append(response) if len(to_delete) == 0: print 'No Chronos Jobs to remove' else: if len(task_successes) > 0: print format_list_output("Successfully Removed Tasks (if any were running) for:", [job[0] for job in task_successes]) # if there are any failures, print and exit appropriately if len(task_failures) > 0: print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures]) if len(job_successes) > 0: print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes]) # if there are any failures, print and exit appropriately if len(job_failures) > 0: print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures]) if len(job_failures) > 0 or len(task_failures) > 0: sys.exit(1)
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() configured_jobs = chronos_tools.get_chronos_jobs_for_cluster( cluster, soa_dir=soa_dir) try: service_job_mapping = build_service_job_mapping( client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] try: chronos_job_config = load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) except utils.NoDeploymentsAvailable: paasta_print( utils.PaastaColors.cyan( "Skipping %s because no deployments are available" % service)) continue sensu_output, sensu_status = sensu_message_status_for_jobs( chronos_job_config=chronos_job_config, service=service, instance=instance, cluster=cluster, job_state_pairs=job_state_pairs) if sensu_status is not None: monitoring_overrides = compose_monitoring_overrides_for_service( chronos_job_config=chronos_job_config, soa_dir=soa_dir) send_event( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=soa_dir, ) except (chronos.ChronosAPIError) as e: paasta_print( utils.PaastaColors.red( "CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2)
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir) try: service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] try: chronos_job_config = load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) except utils.NoDeploymentsAvailable: paasta_print(utils.PaastaColors.cyan("Skipping %s because no deployments are available" % service)) continue sensu_output, sensu_status = sensu_message_status_for_jobs( chronos_job_config=chronos_job_config, service=service, instance=instance, cluster=cluster, job_state_pairs=job_state_pairs ) if sensu_status is not None: monitoring_overrides = compose_monitoring_overrides_for_service( chronos_job_config=chronos_job_config, soa_dir=soa_dir ) send_event( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=soa_dir, ) except (chronos.ChronosAPIError) as e: paasta_print(utils.PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2)
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir) try: service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] chronos_job_config = load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) sensu_output, sensu_status = sensu_message_status_for_jobs( chronos_job_config=chronos_job_config, service=service, instance=instance, cluster=cluster, job_state_pairs=job_state_pairs ) monitoring_overrides = compose_monitoring_overrides_for_service( chronos_job_config=chronos_job_config, soa_dir=soa_dir ) send_event( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=soa_dir, ) except (ServerNotFoundError, chronos.ChronosAPIError, socket_error) as e: print(utils.PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2)
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir) service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] chronos_job_config = load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) sensu_output, sensu_status = sensu_message_status_for_jobs( chronos_job_config=chronos_job_config, service=service, instance=instance, cluster=cluster, job_state_pairs=job_state_pairs ) monitoring_overrides = compose_monitoring_overrides_for_service( chronos_job_config=chronos_job_config, soa_dir=soa_dir ) send_event( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=soa_dir, )
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() configured_jobs = chronos_tools.get_chronos_jobs_for_cluster( cluster, soa_dir=soa_dir) service_job_mapping = build_service_job_mapping(client, configured_jobs) for service_instance, job_state_pairs in service_job_mapping.items(): service, instance = service_instance[0], service_instance[1] chronos_job_config = load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, ) sensu_output, sensu_status = sensu_message_status_for_jobs( chronos_job_config=chronos_job_config, service=service, instance=instance, cluster=cluster, job_state_pairs=job_state_pairs) monitoring_overrides = compose_monitoring_overrides_for_service( chronos_job_config=chronos_job_config, soa_dir=soa_dir) send_event( service=service, instance=instance, monitoring_overrides=monitoring_overrides, status_code=sensu_status, message=sensu_output, soa_dir=soa_dir, )
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) running_jobs = set(deployed_job_names(client)) expected_service_jobs = set([chronos_tools.compose_job_id(*job) for job in chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)]) all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs))) expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs)) valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs task_responses = cleanup_tasks(client, to_delete) task_successes = [] task_failures = [] for response in task_responses: if isinstance(response[-1], Exception): task_failures.append(response) else: task_successes.append(response) job_responses = cleanup_jobs(client, to_delete) job_successes = [] job_failures = [] for response in job_responses: if isinstance(response[-1], Exception): job_failures.append(response) else: job_successes.append(response) try: (service, instance) = chronos_tools.decompose_job_id(response[0]) send_event( service=service, instance=instance, monitoring_overrides={}, soa_dir=soa_dir, status_code=pysensu_yelp.Status.OK, message="This instance was removed and is no longer supposed to be scheduled.", ) except InvalidJobNameError: # If we deleted some bogus job with a bogus jobid that could not be parsed, # Just move on, no need to send any kind of paasta event. pass if len(to_delete) == 0: print 'No Chronos Jobs to remove' else: if len(task_successes) > 0: print format_list_output("Successfully Removed Tasks (if any were running) for:", [job[0] for job in task_successes]) # if there are any failures, print and exit appropriately if len(task_failures) > 0: print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures]) if len(job_successes) > 0: print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes]) # if there are any failures, print and exit appropriately if len(job_failures) > 0: print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures]) if len(job_failures) > 0 or len(task_failures) > 0: sys.exit(1)
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() running_jobs = set(deployed_job_names(client)) expected_service_jobs = {chronos_tools.compose_job_id(*job) for job in chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)} all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs))) expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs, cluster=cluster, soa_dir=soa_dir)) valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs task_responses = cleanup_tasks(client, to_delete) task_successes = [] task_failures = [] for response in task_responses: if isinstance(response[-1], Exception): task_failures.append(response) else: task_successes.append(response) job_responses = cleanup_jobs(client, to_delete) job_successes = [] job_failures = [] for response in job_responses: if isinstance(response[-1], Exception): job_failures.append(response) else: job_successes.append(response) try: (service, instance) = chronos_tools.decompose_job_id(response[0]) monitoring_tools.send_event( check_name=check_chronos_job_name(service, instance), service=service, overrides={}, soa_dir=soa_dir, status=pysensu_yelp.Status.OK, output="This instance was removed and is no longer supposed to be scheduled.", ) except InvalidJobNameError: # If we deleted some bogus job with a bogus jobid that could not be parsed, # Just move on, no need to send any kind of paasta event. pass if len(to_delete) == 0: paasta_print('No Chronos Jobs to remove') else: if len(task_successes) > 0: paasta_print(format_list_output( "Successfully Removed Tasks (if any were running) for:", [job[0] for job in task_successes], )) # if there are any failures, print and exit appropriately if len(task_failures) > 0: paasta_print(format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])) if len(job_successes) > 0: paasta_print(format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])) # if there are any failures, print and exit appropriately if len(job_failures) > 0: paasta_print(format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])) if len(job_failures) > 0 or len(task_failures) > 0: sys.exit(1)