def bounce_chronos_job( service, instance, cluster, jobs_to_disable, jobs_to_delete, job_to_create, client ): if any([jobs_to_disable, jobs_to_delete, job_to_create]): log_line = "Chronos bouncing. Jobs to disable: %s, jobs to delete: %s, job_to_create: %s" % ( jobs_to_disable, jobs_to_delete, job_to_create) _log(service=service, instance=instance, component='deploy', cluster=cluster, level='debug', line=log_line) else: log.debug("Not doing any chronos bounce action for %s" % chronos_tools.compose_job_id( service, instance)) for job in jobs_to_disable: chronos_tools.disable_job(client=client, job=job) for job in jobs_to_delete: chronos_tools.delete_job(client=client, job=job) if job_to_create: chronos_tools.create_job(client=client, job=job_to_create) log_line = 'Created new Chronos job: %s' % job_to_create['name'] _log(service=service, instance=instance, component='deploy', cluster=cluster, level='event', line=log_line) return (0, "All chronos bouncing tasks finished.")
def launch_jobs(context, num_jobs, state, service, job): client = context.chronos_client jobs = [{ 'async': False, 'command': 'echo 1', 'epsilon': 'PT15M', 'name': compose_job_id(service, job), 'owner': 'paasta', 'disabled': True, 'schedule': 'R/2014-01-01T00:00:00Z/PT60M', } for x in range(0, int(num_jobs))] for job in jobs: try: print 'attempting to create job %s' % job['name'] client.add(job) except Exception: print 'Error creating test job: %s' % json.dumps(job) raise # a 'configured' job is one which has had the appropriate # yelp-soa configs into place. # an 'unconfigured' job represents a job which may at one stage # been a configured chronos job, but no longer has the # corresponding configuration in place the target for. # 'unconfigured' jobs are the target for cleanup_chronos_jobs if state == "configured": context.configured_job_names = [job['name'] for job in jobs] elif state == "unconfigured": context.unconfigured_job_names = [job['name'] for job in jobs]
def launch_jobs(context, num_jobs, state, service, job): client = context.chronos_client jobs = [{ 'async': False, 'command': 'echo 1', 'epsilon': 'PT15M', 'name': compose_job_id(service, job), 'owner': 'paasta', 'disabled': True, 'schedule': 'R/2014-01-01T00:00:00Z/PT60M', } for x in range(0, int(num_jobs))] for job in jobs: try: paasta_print('attempting to create job %s' % job['name']) client.add(job) except Exception: paasta_print('Error creating test job: %s' % json.dumps(job)) raise # a 'configured' job is one which has had the appropriate # yelp-soa configs into place. # an 'unconfigured' job represents a job which may at one stage # been a configured chronos job, but no longer has the # corresponding configuration in place the target for. # 'unconfigured' jobs are the target for cleanup_chronos_jobs if state == "configured": context.configured_job_names = [job['name'] for job in jobs] elif state == "unconfigured": context.unconfigured_job_names = [job['name'] for job in jobs]
def launch_jobs(context, num_jobs, state, service, job): client = context.chronos_client jobs = [{ "async": False, "command": "echo 1", "epsilon": "PT15M", "name": compose_job_id(service, job), "owner": "paasta", "disabled": True, "schedule": "R/2014-01-01T00:00:00Z/PT60M", } for x in range(0, int(num_jobs))] for job in jobs: try: paasta_print("attempting to create job %s" % job["name"]) client.add(job) except Exception: paasta_print("Error creating test job: %s" % json.dumps(job)) raise # a 'configured' job is one which has had the appropriate # yelp-soa configs into place. # an 'unconfigured' job represents a job which may at one stage # been a configured chronos job, but no longer has the # corresponding configuration in place the target for. # 'unconfigured' jobs are the target for cleanup_chronos_jobs if state == "configured": context.configured_job_names = [job["name"] for job in jobs] elif state == "unconfigured": context.unconfigured_job_names = [job["name"] for job in jobs]
def bounce_chronos_job(service, instance, cluster, jobs_to_disable, jobs_to_delete, job_to_create, client): if any([jobs_to_disable, jobs_to_delete, job_to_create]): log_line = "Chronos bouncing. Jobs to disable: %s, jobs to delete: %s, job_to_create: %s" % ( jobs_to_disable, jobs_to_delete, job_to_create) _log(service=service, instance=instance, component='deploy', cluster=cluster, level='debug', line=log_line) else: log.debug("Not doing any chronos bounce action for %s" % chronos_tools.compose_job_id(service, instance)) for job in jobs_to_disable: chronos_tools.disable_job(client=client, job=job) for job in jobs_to_delete: chronos_tools.delete_job(client=client, job=job) if job_to_create: chronos_tools.create_job(client=client, job=job_to_create) log_line = 'Created new Chronos job: %s' % job_to_create['name'] _log(service=service, instance=instance, component='deploy', cluster=cluster, level='event', line=log_line) return (0, "All chronos bouncing tasks finished.")
def main(): args = parse_args() config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) # get_chronos_jobs_for_cluster returns (service, job) expected_service_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir) # filter jobs not related to paasta # and decompose into (service, instance, tag) tuples paasta_jobs = filter_paasta_jobs(deployed_job_names(client)) running_service_jobs = [chronos_tools.decompose_job_id(job) for job in paasta_jobs] to_delete = jobs_to_delete(expected_service_jobs, running_service_jobs) # recompose the job ids again for deletion to_delete_job_ids = [chronos_tools.compose_job_id(*job) for job in to_delete] task_responses = cleanup_tasks(client, to_delete_job_ids) task_successes = [] task_failures = [] for response in task_responses: if isinstance(response[-1], Exception): task_failures.append(response) else: task_successes.append(response) job_responses = cleanup_jobs(client, to_delete_job_ids) job_successes = [] job_failures = [] for response in job_responses: if isinstance(response[-1], Exception): job_failures.append(response) else: job_successes.append(response) if len(to_delete) == 0: print 'No Chronos Jobs to remove' else: if len(task_successes) > 0: print format_list_output("Successfully Removed Tasks (if any were running) for:", [job[0] for job in task_successes]) # if there are any failures, print and exit appropriately if len(task_failures) > 0: print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures]) if len(job_successes) > 0: print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes]) # if there are any failures, print and exit appropriately if len(job_failures) > 0: print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures]) if len(job_failures) > 0 or len(task_failures) > 0: sys.exit(1)
def chronos_log_line_passes_filter(line, levels, service, components, clusters): """Given a (JSON-formatted) log line where the message is a Marathon log line, return True if the line should be displayed given the provided service; return False otherwise.""" try: parsed_line = json.loads(line) except ValueError: log.debug('Trouble parsing line as json. Skipping. Line: %r' % line) return False return chronos_tools.compose_job_id(service, '') in parsed_line.get('message', '')
def old_jobs_leftover(context, job_count): old_job = copy.deepcopy(fake_service_config) for n in xrange(0, int(job_count)): old_job["name"] = chronos_tools.compose_job_id( service=fake_service_name, instance=fake_instance_name, git_hash="git%d" % n, config_hash="config", ) context.chronos_client.add(old_job)
def chronos_log_line_passes_filter( line, levels, service, components, clusters, instances, start_time=None, end_time=None ): """Given a (JSON-formatted) log line where the message is a Marathon log line, return True if the line should be displayed given the provided service; return False otherwise.""" try: parsed_line = json.loads(line) except ValueError: log.debug("Trouble parsing line as json. Skipping. Line: %r" % line) return False timestamp = isodate.parse_datetime(parsed_line.get("timestamp")) if not check_timestamp_in_range(timestamp, start_time, end_time): return False return chronos_tools.compose_job_id(service, "") in parsed_line.get("message", "")
def chronos_log_line_passes_filter( line, levels, service, components, clusters, instances, start_time=None, end_time=None, ): """Given a (JSON-formatted) log line where the message is a Marathon log line, return True if the line should be displayed given the provided service; return False otherwise.""" try: parsed_line = json.loads(line) except ValueError: log.debug('Trouble parsing line as json. Skipping. Line: %r' % line) return False timestamp = isodate.parse_datetime(parsed_line.get('timestamp')) if not check_timestamp_in_range(timestamp, start_time, end_time): return False return chronos_tools.compose_job_id(service, '') in parsed_line.get('message', '')
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) running_jobs = set(deployed_job_names(client)) expected_service_jobs = set([chronos_tools.compose_job_id(*job) for job in chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)]) all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs))) expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs)) valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs task_responses = cleanup_tasks(client, to_delete) task_successes = [] task_failures = [] for response in task_responses: if isinstance(response[-1], Exception): task_failures.append(response) else: task_successes.append(response) job_responses = cleanup_jobs(client, to_delete) job_successes = [] job_failures = [] for response in job_responses: if isinstance(response[-1], Exception): job_failures.append(response) else: job_successes.append(response) try: (service, instance) = chronos_tools.decompose_job_id(response[0]) send_event( service=service, instance=instance, monitoring_overrides={}, soa_dir=soa_dir, status_code=pysensu_yelp.Status.OK, message="This instance was removed and is no longer supposed to be scheduled.", ) except InvalidJobNameError: # If we deleted some bogus job with a bogus jobid that could not be parsed, # Just move on, no need to send any kind of paasta event. pass if len(to_delete) == 0: print 'No Chronos Jobs to remove' else: if len(task_successes) > 0: print format_list_output("Successfully Removed Tasks (if any were running) for:", [job[0] for job in task_successes]) # if there are any failures, print and exit appropriately if len(task_failures) > 0: print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures]) if len(job_successes) > 0: print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes]) # if there are any failures, print and exit appropriately if len(job_failures) > 0: print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures]) if len(job_failures) > 0 or len(task_failures) > 0: sys.exit(1)
def main(): args = parse_args() system_paasta_config = load_system_paasta_config() cluster = system_paasta_config.get_cluster() service, instance = chronos_tools.decompose_job_id(args.service_instance) config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) related_jobs = chronos_tools.get_related_jobs_configs(cluster, service, instance, soa_dir=args.soa_dir) if not related_jobs: error_msg = "No deployment found for {} in cluster {}. Has Jenkins run for it?".format( args.service_instance, cluster, ) paasta_print(error_msg) raise NoDeploymentsAvailable if not args.run_all_related_jobs: # Strip all the configuration for the related services # those information will not be used by the rest of the flow related_jobs = { (service, instance): related_jobs[(service, instance)], } complete_job_configs = {} for (srv, inst) in related_jobs: try: complete_job_configs.update( { (srv, inst): chronos_tools.create_complete_config( service=srv, job_name=inst, soa_dir=args.soa_dir, ), }, ) except (NoDeploymentsAvailable, NoDockerImageError) as e: error_msg = "No deployment found for {} in cluster {}. Has Jenkins run for it?".format( chronos_tools.compose_job_id(srv, inst), cluster, ) paasta_print(error_msg) raise e except NoConfigurationForServiceError as e: error_msg = ( "Could not read chronos configuration file for {} in cluster {}\nError was: {}" .format( chronos_tools.compose_job_id(srv, inst), cluster, str(e), )) paasta_print(error_msg) raise e except chronos_tools.InvalidParentError as e: raise e if not args.run_all_related_jobs: sorted_jobs = [(service, instance)] else: sorted_jobs = chronos_tools.topological_sort_related_jobs( cluster, service, instance, soa_dir=args.soa_dir) timestamp = datetime.datetime.utcnow().isoformat() chronos_to_add = [] for (service, instance) in sorted_jobs: # complete_job_config is a formatted version of the job, # so the command is formatted in the context of 'now' # replace it with the 'original' cmd so it can be re rendered chronos_job_config = chronos_tools.load_chronos_job_config( service=service, instance=instance, cluster=cluster, soa_dir=args.soa_dir, ) original_command = chronos_job_config.get_cmd() complete_job_config = complete_job_configs[(service, instance)] complete_job_config['command'] = original_command clone = clone_job( chronos_job=complete_job_config, timestamp=timestamp, force_disabled=args.force_disabled, ) # modify the command to run commands for a given date clone = modify_command_for_date( chronos_job=clone, date=datetime.datetime.strptime(args.execution_date, "%Y-%m-%dT%H:%M:%S"), verbose=args.verbose, ) if not args.run_all_related_jobs and chronos_tools.get_job_type( clone) == chronos_tools.JobType.Dependent: # If the job is a dependent job and we want to re-run only the specific instance # remove the parents and update the schedule to start the job as soon as possible clone = set_default_schedule(remove_parents(clone)) chronos_to_add.append(clone) for job_to_add in chronos_to_add: client.add(job_to_add)
def main(): args = parse_args() soa_dir = args.soa_dir config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(config) system_paasta_config = utils.load_system_paasta_config() cluster = system_paasta_config.get_cluster() running_jobs = set(deployed_job_names(client)) expected_service_jobs = {chronos_tools.compose_job_id(*job) for job in chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)} all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs))) expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs, cluster=cluster, soa_dir=soa_dir)) valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs task_responses = cleanup_tasks(client, to_delete) task_successes = [] task_failures = [] for response in task_responses: if isinstance(response[-1], Exception): task_failures.append(response) else: task_successes.append(response) job_responses = cleanup_jobs(client, to_delete) job_successes = [] job_failures = [] for response in job_responses: if isinstance(response[-1], Exception): job_failures.append(response) else: job_successes.append(response) try: (service, instance) = chronos_tools.decompose_job_id(response[0]) monitoring_tools.send_event( check_name=check_chronos_job_name(service, instance), service=service, overrides={}, soa_dir=soa_dir, status=pysensu_yelp.Status.OK, output="This instance was removed and is no longer supposed to be scheduled.", ) except InvalidJobNameError: # If we deleted some bogus job with a bogus jobid that could not be parsed, # Just move on, no need to send any kind of paasta event. pass if len(to_delete) == 0: paasta_print('No Chronos Jobs to remove') else: if len(task_successes) > 0: paasta_print(format_list_output( "Successfully Removed Tasks (if any were running) for:", [job[0] for job in task_successes], )) # if there are any failures, print and exit appropriately if len(task_failures) > 0: paasta_print(format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])) if len(job_successes) > 0: paasta_print(format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])) # if there are any failures, print and exit appropriately if len(job_failures) > 0: paasta_print(format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])) if len(job_failures) > 0 or len(task_failures) > 0: sys.exit(1)