Ejemplo n.º 1
0
def filter_expired_tmp_jobs(client, job_names):
    """
    Given a list of temporary jobs, find those ready to be removed. Their
    suitablity for removal is defined by two things:

        - the job has completed (irrespective of whether it was a success or
          failure)
        - the job completed more than 24 hours ago
    """
    expired = []
    for job_name in job_names:
        service, instance = chronos_tools.decompose_job_id(job_name)
        temporary_jobs = chronos_tools.get_temporary_jobs_for_service_instance(
            client=client,
            service=service,
            instance=instance
        )
        for job in temporary_jobs:
            last_run_time, last_run_state = chronos_tools.get_status_last_run(job)
            if last_run_state != chronos_tools.LastRunState.NotRun:
                if ((datetime.datetime.now(dateutil.tz.tzutc()) -
                     dateutil.parser.parse(last_run_time)) >
                        datetime.timedelta(days=1)):
                    expired.append(job_name)
    return expired
Ejemplo n.º 2
0
def last_run_state_for_jobs(jobs):
    """
    Map over a list of jobs to create a pair of (job, LasRunState).
    ``chronos_tools.get_status_last_run`` returns a pair of (time, state), of which
    we only need the latter([-1]).
    """
    return [(chronos_job, chronos_tools.get_status_last_run(chronos_job)[-1]) for chronos_job in jobs]
Ejemplo n.º 3
0
def _format_last_result(job):
    time, status = chronos_tools.get_status_last_run(job)
    if status is chronos_tools.LastRunState.NotRun:
        formatted_time = "never"
    else:
        formatted_time = _prettify_time(time)
    return _prettify_status(status), formatted_time
Ejemplo n.º 4
0
def filter_expired_tmp_jobs(client, job_names):
    """
    Given a list of temporary jobs, find those ready to be removed. Their
    suitablity for removal is defined by two things:

        - the job has completed (irrespective of whether it was a success or
          failure)
        - the job completed more than 24 hours ago
    """
    expired = []
    for job_name in job_names:
        service, instance = chronos_tools.decompose_job_id(job_name)
        temporary_jobs = chronos_tools.get_temporary_jobs_for_service_instance(
            client=client,
            service=service,
            instance=instance,
        )
        for job in temporary_jobs:
            last_run_time, last_run_state = chronos_tools.get_status_last_run(
                job)
            if last_run_state != chronos_tools.LastRunState.NotRun:
                if ((datetime.datetime.now(dateutil.tz.tzutc()) -
                     dateutil.parser.parse(last_run_time)) >
                        datetime.timedelta(days=1)):
                    expired.append(job_name)
    return expired
Ejemplo n.º 5
0
def last_run_state_for_jobs(jobs):
    """
    Map over a list of jobs to create a pair of (job, LasRunState).
    ``chronos_tools.get_status_last_run`` returns a pair of (time, state), of which
    we only need the latter([-1]).
    """
    return [(chronos_job, chronos_tools.get_status_last_run(chronos_job)[-1]) for chronos_job in jobs]
Ejemplo n.º 6
0
def filter_expired_tmp_jobs(client, job_names, cluster, soa_dir):
    """
    Given a list of temporary jobs, find those ready to be removed. Their
    suitability for removal is defined by two things:

        - the job has completed (irrespective of whether it was a success or
          failure)
        - the job completed more than 24 hours ago
    """
    expired = []
    for job_name in job_names:
        service, instance = chronos_tools.decompose_job_id(job_name)
        temporary_jobs = chronos_tools.get_temporary_jobs_for_service_instance(
            client=client, service=service, instance=instance
        )
        for job in temporary_jobs:
            last_run_time, last_run_state = chronos_tools.get_status_last_run(job)
            try:
                chronos_job_config = chronos_tools.load_chronos_job_config(
                    service=service, instance=instance, cluster=cluster, soa_dir=soa_dir
                )
                interval = chronos_job_config.get_schedule_interval_in_seconds() or 0
            except NoConfigurationForServiceError:
                # If we can't get the job's config, default to cleanup after 1 day
                interval = 0
            if last_run_state != chronos_tools.LastRunState.NotRun:
                if (
                    datetime.datetime.now(dateutil.tz.tzutc())
                    - dateutil.parser.parse(last_run_time)
                ) > max(
                    datetime.timedelta(seconds=interval), datetime.timedelta(days=1)
                ):
                    expired.append(job_name)
    return expired
Ejemplo n.º 7
0
def _format_last_result(job):
    time, status = chronos_tools.get_status_last_run(job)
    if status is chronos_tools.LastRunState.NotRun:
        formatted_time = "never"
    else:
        formatted_time = _prettify_time(time)
    return _prettify_status(status), formatted_time
Ejemplo n.º 8
0
def chronos_instance_status(instance_status, service, instance, verbose):
    cstatus = {}
    chronos_config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(chronos_config)
    job_config = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=settings.cluster,
        soa_dir=settings.soa_dir,
    )
    cstatus['desired_state'] = job_config.get_desired_state()
    job_type = chronos_tools.get_job_type(job_config.config_dict)
    if job_type == chronos_tools.JobType.Scheduled:
        schedule_type = 'schedule'
        schedule = job_config.get_schedule()
        epsilon = job_config.get_epsilon()
        time_zone = job_config.get_schedule_time_zone()
        if time_zone == 'null' or time_zone is None:
            time_zone = 'UTC'
        cstatus['schedule'] = {}
        cstatus['schedule']['schedule'] = schedule
        cstatus['schedule']['epsilon'] = epsilon
        cstatus['schedule']['time_zone'] = time_zone
    elif job_type == chronos_tools.JobType.Dependent:
        schedule_type = 'parents'
        parents = job_config.get_parents()
        cstatus['parents'] = parents
    else:
        schedule_type = 'unknown'
    cstatus['schedule_type'] = schedule_type
    cstatus['status'] = {}
    if verbose:
        running_task_count = len(
            select_tasks_by_id(
                a_sync.block(get_cached_list_of_running_tasks_from_frameworks),
                job_config.get_job_name(),
            ),
        )
        cstatus['status']['mesos_state'] = 'running' if running_task_count else 'not_running'
    cstatus['status']['disabled_state'] = 'not_scheduled' if job_config.get_disabled() else 'scheduled'
    cstatus['status']['chronos_state'] = chronos_tools.get_chronos_status_for_job(client, service, instance)
    cstatus['command'] = job_config.get_cmd()
    last_time, last_status = chronos_tools.get_status_last_run(job_config.config_dict)
    if last_status == chronos_tools.LastRunState.Success:
        last_status = 'success'
    elif last_status == chronos_tools.LastRunState.Fail:
        last_status = 'fail'
    elif last_status == chronos_tools.LastRunState.NotRun:
        last_status = 'not_run'
    else:
        last_status = ''
    if last_status == 'not_run' or last_status == '':
        last_time = 'never'
    cstatus['last_status'] = {}
    cstatus['last_status']['result'] = last_status
    cstatus['last_status']['time'] = last_time

    return cstatus
Ejemplo n.º 9
0
def sensu_message_status_for_jobs(chronos_job_config, chronos_job, client):
    """
    :param chronos_job_config: an instance of ChronosJobConfig
    :param client: configured Chronos client
    """
    if not chronos_job:
        if chronos_job_config.get_disabled():
            sensu_status = pysensu_yelp.Status.OK
            output = "Job {}{}{} is disabled - ignoring status.".format(
                chronos_job_config.service, utils.SPACER,
                chronos_job_config.instance)
        else:
            sensu_status = pysensu_yelp.Status.WARNING
            output = ("Warning: %s%s%s isn't in chronos at all, "
                      "which means it may not be deployed yet" % (
                          chronos_job_config.service,
                          utils.SPACER,
                          chronos_job_config.instance,
                      ))
    else:
        if chronos_job.get("disabled") and not chronos_tools.is_temporary_job(
                chronos_job):
            sensu_status = pysensu_yelp.Status.OK
            output = "Job {}{}{} is disabled - ignoring status.".format(
                chronos_job_config.service, utils.SPACER,
                chronos_job_config.instance)
        else:
            last_run_time, state = chronos_tools.get_status_last_run(
                chronos_job)
            interval_in_seconds = chronos_job_config.get_schedule_interval_in_seconds(
            )
            if job_is_stuck(last_run_time, interval_in_seconds, client,
                            chronos_job["name"]):
                sensu_status = pysensu_yelp.Status.CRITICAL
                output = message_for_stuck_job(
                    service=chronos_job_config.service,
                    instance=chronos_job_config.instance,
                    cluster=chronos_job_config.cluster,
                    last_run_iso_time=last_run_time,
                    interval_in_seconds=interval_in_seconds,
                    schedule=chronos_job_config.get_schedule(),
                    schedule_timezone=chronos_job_config.
                    get_schedule_time_zone(),
                )
            else:
                sensu_status = sensu_event_for_last_run_state(state)
                output = message_for_status(sensu_status, chronos_job_config)
    return output, sensu_status
Ejemplo n.º 10
0
def sensu_message_status_for_jobs(chronos_job_config, service, instance,
                                  cluster, chronos_job):
    if not chronos_job:
        if chronos_job_config.get_disabled():
            sensu_status = pysensu_yelp.Status.OK
            output = ("Job %s%s%s is disabled - ignoring status." %
                      (service, utils.SPACER, instance))
        else:
            sensu_status = pysensu_yelp.Status.WARNING
            output = ("Warning: %s%s%s isn't in chronos at all, "
                      "which means it may not be deployed yet" %
                      (service, utils.SPACER, instance))
    else:
        if chronos_job.get('disabled') and not chronos_tools.is_temporary_job(
                chronos_job):
            sensu_status = pysensu_yelp.Status.OK
            output = "Job %s%s%s is disabled - ignoring status." % (
                service, utils.SPACER, instance)
        else:
            last_run_time, state = chronos_tools.get_status_last_run(
                chronos_job)
            interval_in_seconds = chronos_job_config.get_schedule_interval_in_seconds(
            )
            if job_is_stuck(last_run_time, interval_in_seconds):
                sensu_status = pysensu_yelp.Status.CRITICAL
                output = message_for_stuck_job(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    last_run_iso_time=last_run_time,
                    interval_in_seconds=interval_in_seconds,
                    schedule=chronos_job_config.get_schedule(),
                    schedule_timezone=chronos_job_config.
                    get_schedule_time_zone(),
                )
            else:
                sensu_status = sensu_event_for_last_run_state(state)
                output = message_for_status(sensu_status, service, instance,
                                            cluster)
    return output, sensu_status