Exemple #1
0
def bounce_chronos_job(
    service,
    instance,
    cluster,
    jobs_to_disable,
    jobs_to_delete,
    job_to_create,
    client
):
    if any([jobs_to_disable, jobs_to_delete, job_to_create]):
        log_line = "Chronos bouncing. Jobs to disable: %s, jobs to delete: %s, job_to_create: %s" % (
            jobs_to_disable, jobs_to_delete, job_to_create)
        _log(service=service, instance=instance, component='deploy',
             cluster=cluster, level='debug', line=log_line)
    else:
        log.debug("Not doing any chronos bounce action for %s" % chronos_tools.compose_job_id(
            service, instance))
    for job in jobs_to_disable:
        chronos_tools.disable_job(client=client, job=job)
    for job in jobs_to_delete:
        chronos_tools.delete_job(client=client, job=job)
    if job_to_create:
        chronos_tools.create_job(client=client, job=job_to_create)
        log_line = 'Created new Chronos job: %s' % job_to_create['name']
        _log(service=service, instance=instance, component='deploy',
             cluster=cluster, level='event', line=log_line)
    return (0, "All chronos bouncing tasks finished.")
def launch_jobs(context, num_jobs, state, service, job):
    client = context.chronos_client
    jobs = [{
        'async': False,
        'command': 'echo 1',
        'epsilon': 'PT15M',
        'name': compose_job_id(service, job),
        'owner': 'paasta',
        'disabled': True,
        'schedule': 'R/2014-01-01T00:00:00Z/PT60M',
    } for x in range(0, int(num_jobs))]
    for job in jobs:
        try:
            print 'attempting to create job %s' % job['name']
            client.add(job)
        except Exception:
            print 'Error creating test job: %s' % json.dumps(job)
            raise

    # a 'configured' job is one which has had the appropriate
    # yelp-soa configs into place.
    # an 'unconfigured' job represents a job which may at one stage
    # been a configured chronos job, but no longer has the
    # corresponding configuration in place the target for.
    # 'unconfigured' jobs are the target for cleanup_chronos_jobs
    if state == "configured":
        context.configured_job_names = [job['name'] for job in jobs]
    elif state == "unconfigured":
        context.unconfigured_job_names = [job['name'] for job in jobs]
def launch_jobs(context, num_jobs, state, service, job):
    client = context.chronos_client
    jobs = [{
        'async': False,
        'command': 'echo 1',
        'epsilon': 'PT15M',
        'name': compose_job_id(service, job),
        'owner': 'paasta',
        'disabled': True,
        'schedule': 'R/2014-01-01T00:00:00Z/PT60M',
    } for x in range(0, int(num_jobs))]
    for job in jobs:
        try:
            paasta_print('attempting to create job %s' % job['name'])
            client.add(job)
        except Exception:
            paasta_print('Error creating test job: %s' % json.dumps(job))
            raise

    # a 'configured' job is one which has had the appropriate
    # yelp-soa configs into place.
    # an 'unconfigured' job represents a job which may at one stage
    # been a configured chronos job, but no longer has the
    # corresponding configuration in place the target for.
    # 'unconfigured' jobs are the target for cleanup_chronos_jobs
    if state == "configured":
        context.configured_job_names = [job['name'] for job in jobs]
    elif state == "unconfigured":
        context.unconfigured_job_names = [job['name'] for job in jobs]
Exemple #4
0
def launch_jobs(context, num_jobs, state, service, job):
    client = context.chronos_client
    jobs = [{
        "async": False,
        "command": "echo 1",
        "epsilon": "PT15M",
        "name": compose_job_id(service, job),
        "owner": "paasta",
        "disabled": True,
        "schedule": "R/2014-01-01T00:00:00Z/PT60M",
    } for x in range(0, int(num_jobs))]
    for job in jobs:
        try:
            paasta_print("attempting to create job %s" % job["name"])
            client.add(job)
        except Exception:
            paasta_print("Error creating test job: %s" % json.dumps(job))
            raise

    # a 'configured' job is one which has had the appropriate
    # yelp-soa configs into place.
    # an 'unconfigured' job represents a job which may at one stage
    # been a configured chronos job, but no longer has the
    # corresponding configuration in place the target for.
    # 'unconfigured' jobs are the target for cleanup_chronos_jobs
    if state == "configured":
        context.configured_job_names = [job["name"] for job in jobs]
    elif state == "unconfigured":
        context.unconfigured_job_names = [job["name"] for job in jobs]
Exemple #5
0
def bounce_chronos_job(service, instance, cluster, jobs_to_disable,
                       jobs_to_delete, job_to_create, client):
    if any([jobs_to_disable, jobs_to_delete, job_to_create]):
        log_line = "Chronos bouncing. Jobs to disable: %s, jobs to delete: %s, job_to_create: %s" % (
            jobs_to_disable, jobs_to_delete, job_to_create)
        _log(service=service,
             instance=instance,
             component='deploy',
             cluster=cluster,
             level='debug',
             line=log_line)
    else:
        log.debug("Not doing any chronos bounce action for %s" %
                  chronos_tools.compose_job_id(service, instance))
    for job in jobs_to_disable:
        chronos_tools.disable_job(client=client, job=job)
    for job in jobs_to_delete:
        chronos_tools.delete_job(client=client, job=job)
    if job_to_create:
        chronos_tools.create_job(client=client, job=job_to_create)
        log_line = 'Created new Chronos job: %s' % job_to_create['name']
        _log(service=service,
             instance=instance,
             component='deploy',
             cluster=cluster,
             level='event',
             line=log_line)
    return (0, "All chronos bouncing tasks finished.")
def main():

    args = parse_args()

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    # get_chronos_jobs_for_cluster returns (service, job)
    expected_service_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)

    # filter jobs not related to paasta
    # and decompose into (service, instance, tag) tuples
    paasta_jobs = filter_paasta_jobs(deployed_job_names(client))
    running_service_jobs = [chronos_tools.decompose_job_id(job) for job in paasta_jobs]

    to_delete = jobs_to_delete(expected_service_jobs, running_service_jobs)

    # recompose the job ids again for deletion
    to_delete_job_ids = [chronos_tools.compose_job_id(*job) for job in to_delete]

    task_responses = cleanup_tasks(client, to_delete_job_ids)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete_job_ids)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)

    if len(to_delete) == 0:
        print 'No Chronos Jobs to remove'
    else:
        if len(task_successes) > 0:
            print format_list_output("Successfully Removed Tasks (if any were running) for:",
                                     [job[0] for job in task_successes])

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])

        if len(job_successes) > 0:
            print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)
Exemple #7
0
def main():

    args = parse_args()

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    # get_chronos_jobs_for_cluster returns (service, job)
    expected_service_jobs = chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)

    # filter jobs not related to paasta
    # and decompose into (service, instance, tag) tuples
    paasta_jobs = filter_paasta_jobs(deployed_job_names(client))
    running_service_jobs = [chronos_tools.decompose_job_id(job) for job in paasta_jobs]

    to_delete = jobs_to_delete(expected_service_jobs, running_service_jobs)

    # recompose the job ids again for deletion
    to_delete_job_ids = [chronos_tools.compose_job_id(*job) for job in to_delete]

    task_responses = cleanup_tasks(client, to_delete_job_ids)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete_job_ids)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)

    if len(to_delete) == 0:
        print 'No Chronos Jobs to remove'
    else:
        if len(task_successes) > 0:
            print format_list_output("Successfully Removed Tasks (if any were running) for:",
                                     [job[0] for job in task_successes])

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])

        if len(job_successes) > 0:
            print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)
Exemple #8
0
def chronos_log_line_passes_filter(line, levels, service, components, clusters):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False
    return chronos_tools.compose_job_id(service, '') in parsed_line.get('message', '')
def old_jobs_leftover(context, job_count):
    old_job = copy.deepcopy(fake_service_config)
    for n in xrange(0, int(job_count)):
        old_job["name"] = chronos_tools.compose_job_id(
            service=fake_service_name,
            instance=fake_instance_name,
            git_hash="git%d" % n,
            config_hash="config",
        )
        context.chronos_client.add(old_job)
Exemple #10
0
def chronos_log_line_passes_filter(line, levels, service, components,
                                   clusters):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False
    return chronos_tools.compose_job_id(service,
                                        '') in parsed_line.get('message', '')
Exemple #11
0
def chronos_log_line_passes_filter(
    line, levels, service, components, clusters, instances, start_time=None, end_time=None
):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug("Trouble parsing line as json. Skipping. Line: %r" % line)
        return False

    timestamp = isodate.parse_datetime(parsed_line.get("timestamp"))
    if not check_timestamp_in_range(timestamp, start_time, end_time):
        return False
    return chronos_tools.compose_job_id(service, "") in parsed_line.get("message", "")
Exemple #12
0
def chronos_log_line_passes_filter(
    line,
    levels,
    service,
    components,
    clusters,
    instances,
    start_time=None,
    end_time=None,
):
    """Given a (JSON-formatted) log line where the message is a Marathon log line,
    return True if the line should be displayed given the provided service; return False
    otherwise."""
    try:
        parsed_line = json.loads(line)
    except ValueError:
        log.debug('Trouble parsing line as json. Skipping. Line: %r' % line)
        return False

    timestamp = isodate.parse_datetime(parsed_line.get('timestamp'))
    if not check_timestamp_in_range(timestamp, start_time, end_time):
        return False
    return chronos_tools.compose_job_id(service, '') in parsed_line.get('message', '')
def main():

    args = parse_args()
    soa_dir = args.soa_dir

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    running_jobs = set(deployed_job_names(client))

    expected_service_jobs = set([chronos_tools.compose_job_id(*job) for job in
                                 chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)])

    all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs)))
    expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs))
    valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs

    to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs

    task_responses = cleanup_tasks(client, to_delete)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)
            try:
                (service, instance) = chronos_tools.decompose_job_id(response[0])
                send_event(
                    service=service,
                    instance=instance,
                    monitoring_overrides={},
                    soa_dir=soa_dir,
                    status_code=pysensu_yelp.Status.OK,
                    message="This instance was removed and is no longer supposed to be scheduled.",
                )
            except InvalidJobNameError:
                # If we deleted some bogus job with a bogus jobid that could not be parsed,
                # Just move on, no need to send any kind of paasta event.
                pass

    if len(to_delete) == 0:
        print 'No Chronos Jobs to remove'
    else:
        if len(task_successes) > 0:
            print format_list_output("Successfully Removed Tasks (if any were running) for:",
                                     [job[0] for job in task_successes])

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            print format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures])

        if len(job_successes) > 0:
            print format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes])

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            print format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures])

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)
Exemple #14
0
def main():
    args = parse_args()

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    service, instance = chronos_tools.decompose_job_id(args.service_instance)

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    related_jobs = chronos_tools.get_related_jobs_configs(cluster,
                                                          service,
                                                          instance,
                                                          soa_dir=args.soa_dir)
    if not related_jobs:
        error_msg = "No deployment found for {} in cluster {}. Has Jenkins run for it?".format(
            args.service_instance,
            cluster,
        )
        paasta_print(error_msg)
        raise NoDeploymentsAvailable

    if not args.run_all_related_jobs:
        # Strip all the configuration for the related services
        # those information will not be used by the rest of the flow
        related_jobs = {
            (service, instance): related_jobs[(service, instance)],
        }

    complete_job_configs = {}
    for (srv, inst) in related_jobs:
        try:
            complete_job_configs.update(
                {
                    (srv, inst):
                    chronos_tools.create_complete_config(
                        service=srv,
                        job_name=inst,
                        soa_dir=args.soa_dir,
                    ),
                }, )
        except (NoDeploymentsAvailable, NoDockerImageError) as e:
            error_msg = "No deployment found for {} in cluster {}. Has Jenkins run for it?".format(
                chronos_tools.compose_job_id(srv, inst),
                cluster,
            )
            paasta_print(error_msg)
            raise e
        except NoConfigurationForServiceError as e:
            error_msg = (
                "Could not read chronos configuration file for {} in cluster {}\nError was: {}"
                .format(
                    chronos_tools.compose_job_id(srv, inst),
                    cluster,
                    str(e),
                ))
            paasta_print(error_msg)
            raise e
        except chronos_tools.InvalidParentError as e:
            raise e

    if not args.run_all_related_jobs:
        sorted_jobs = [(service, instance)]
    else:
        sorted_jobs = chronos_tools.topological_sort_related_jobs(
            cluster, service, instance, soa_dir=args.soa_dir)

    timestamp = datetime.datetime.utcnow().isoformat()

    chronos_to_add = []
    for (service, instance) in sorted_jobs:
        # complete_job_config is a formatted version of the job,
        # so the command is formatted in the context of 'now'
        # replace it with the 'original' cmd so it can be re rendered
        chronos_job_config = chronos_tools.load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=args.soa_dir,
        )
        original_command = chronos_job_config.get_cmd()
        complete_job_config = complete_job_configs[(service, instance)]
        complete_job_config['command'] = original_command
        clone = clone_job(
            chronos_job=complete_job_config,
            timestamp=timestamp,
            force_disabled=args.force_disabled,
        )
        # modify the command to run commands for a given date
        clone = modify_command_for_date(
            chronos_job=clone,
            date=datetime.datetime.strptime(args.execution_date,
                                            "%Y-%m-%dT%H:%M:%S"),
            verbose=args.verbose,
        )

        if not args.run_all_related_jobs and chronos_tools.get_job_type(
                clone) == chronos_tools.JobType.Dependent:
            # If the job is a dependent job and we want to re-run only the specific instance
            # remove the parents and update the schedule to start the job as soon as possible
            clone = set_default_schedule(remove_parents(clone))

        chronos_to_add.append(clone)

    for job_to_add in chronos_to_add:
        client.add(job_to_add)
Exemple #15
0
def main():

    args = parse_args()
    soa_dir = args.soa_dir

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    running_jobs = set(deployed_job_names(client))

    expected_service_jobs = {chronos_tools.compose_job_id(*job) for job in
                             chronos_tools.get_chronos_jobs_for_cluster(soa_dir=args.soa_dir)}

    all_tmp_jobs = set(filter_tmp_jobs(filter_paasta_jobs(running_jobs)))
    expired_tmp_jobs = set(filter_expired_tmp_jobs(client, all_tmp_jobs, cluster=cluster, soa_dir=soa_dir))
    valid_tmp_jobs = all_tmp_jobs - expired_tmp_jobs

    to_delete = running_jobs - expected_service_jobs - valid_tmp_jobs

    task_responses = cleanup_tasks(client, to_delete)
    task_successes = []
    task_failures = []
    for response in task_responses:
        if isinstance(response[-1], Exception):
            task_failures.append(response)
        else:
            task_successes.append(response)

    job_responses = cleanup_jobs(client, to_delete)
    job_successes = []
    job_failures = []
    for response in job_responses:
        if isinstance(response[-1], Exception):
            job_failures.append(response)
        else:
            job_successes.append(response)
            try:
                (service, instance) = chronos_tools.decompose_job_id(response[0])
                monitoring_tools.send_event(
                    check_name=check_chronos_job_name(service, instance),
                    service=service,
                    overrides={},
                    soa_dir=soa_dir,
                    status=pysensu_yelp.Status.OK,
                    output="This instance was removed and is no longer supposed to be scheduled.",
                )
            except InvalidJobNameError:
                # If we deleted some bogus job with a bogus jobid that could not be parsed,
                # Just move on, no need to send any kind of paasta event.
                pass

    if len(to_delete) == 0:
        paasta_print('No Chronos Jobs to remove')
    else:
        if len(task_successes) > 0:
            paasta_print(format_list_output(
                "Successfully Removed Tasks (if any were running) for:",
                [job[0] for job in task_successes],
            ))

        # if there are any failures, print and exit appropriately
        if len(task_failures) > 0:
            paasta_print(format_list_output("Failed to Delete Tasks for:", [job[0] for job in task_failures]))

        if len(job_successes) > 0:
            paasta_print(format_list_output("Successfully Removed Jobs:", [job[0] for job in job_successes]))

        # if there are any failures, print and exit appropriately
        if len(job_failures) > 0:
            paasta_print(format_list_output("Failed to Delete Jobs:", [job[0] for job in job_failures]))

        if len(job_failures) > 0 or len(task_failures) > 0:
            sys.exit(1)