Exemple #1
0
def test_old_and_new_ways_load_the_same_chronos_configs(
    mock_chronos_tools_read_extra_service_information,
    mock_read_extra_service_information,
    mock_chronos_tools_load_deployments_json,
    mock_load_deployments_json,
):
    mock_read_extra_service_information.return_value = chronos_cluster_config()
    mock_chronos_tools_read_extra_service_information.return_value = (
        chronos_cluster_config())
    mock_load_deployments_json.return_value = deployment_json()
    mock_chronos_tools_load_deployments_json.return_value = deployment_json()
    s = create_test_service()
    expected = [
        load_chronos_job_config(
            service=TEST_SERVICE_NAME,
            instance="example_chronos_job",
            cluster=TEST_CLUSTER_NAME,
            load_deployments=True,
            soa_dir=TEST_SOA_DIR,
        ),
        load_chronos_job_config(
            service=TEST_SERVICE_NAME,
            instance="example_child_job",
            cluster=TEST_CLUSTER_NAME,
            load_deployments=True,
            soa_dir=TEST_SOA_DIR,
        ),
    ]
    assert list(s.instance_configs(TEST_CLUSTER_NAME,
                                   ChronosJobConfig)) == expected
def get_instance_config_for_service(soa_dir, service):
    for cluster in list_clusters(
        service=service,
        soa_dir=soa_dir,
    ):
        for _, instance in get_service_instance_list(
            service=service,
            cluster=cluster,
            instance_type='marathon',
        ):
            yield load_marathon_service_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            )
        for _, instance in get_service_instance_list(
            service=service,
            cluster=cluster,
            instance_type='chronos',
        ):
            yield load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            )
Exemple #3
0
def validate_chronos(service_path):
    soa_dir, service = path_to_soa_dir_service(service_path)
    instance_type = 'chronos'

    returncode = 0
    for cluster in list_clusters(service, soa_dir, instance_type):
        for instance in list_all_instances_for_service(
                service=service,
                clusters=[cluster],
                instance_type=instance_type,
                soa_dir=soa_dir):
            cjc = load_chronos_job_config(service, instance, cluster, False,
                                          soa_dir)
            checks_passed, check_msgs = cjc.validate()

            # Remove duplicate check_msgs
            unique_check_msgs = list(set(check_msgs))

            if not checks_passed:
                print invalid_chronos_instance(cluster, instance,
                                               "\n  ".join(unique_check_msgs))
                returncode = 1
            else:
                print valid_chronos_instance(cluster, instance)
    return returncode
Exemple #4
0
def create_chronos_job_config_object_from_configs(context, instance, service):
    context.chronos_job_config_obj = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=context.cluster,
        soa_dir=context.soa_dir,
    )
Exemple #5
0
def get_instance_config_for_service(soa_dir, service):
    for cluster in list_clusters(
            service=service,
            soa_dir=soa_dir,
    ):
        for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='marathon',
        ):
            yield load_marathon_service_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            )
        for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='chronos',
        ):
            yield load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            )
def send_event(service, instance, soa_dir, status, output):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param service: The service name the event is about
    :param instance: The instance of the service the event is about
    :param soa_dir: The service directory to read monitoring information from
    :param status: The status to emit for this event
    :param output: The output to emit for this event
    """
    cluster = load_system_paasta_config().get_cluster()
    monitoring_overrides = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=cluster,
        soa_dir=soa_dir,
    ).get_monitoring()
    # In order to let sensu know how often to expect this check to fire,
    # we need to set the ``check_every`` to the frequency of our cron job, which
    # is 10s.
    monitoring_overrides['check_every'] = '10s'
    # Most deploy_chronos_jobs failures are transient and represent issues
    # that will probably be fixed eventually, so we set an alert_after
    # to suppress extra noise
    monitoring_overrides['alert_after'] = '10m'
    check_name = 'setup_chronos_job.%s' % compose_job_id(service, instance)
    monitoring_tools.send_event(
        service=service,
        check_name=check_name,
        overrides=monitoring_overrides,
        status=status,
        output=output,
        soa_dir=soa_dir,
    )
Exemple #7
0
def filter_expired_tmp_jobs(client, job_names, cluster, soa_dir):
    """
    Given a list of temporary jobs, find those ready to be removed. Their
    suitability for removal is defined by two things:

        - the job has completed (irrespective of whether it was a success or
          failure)
        - the job completed more than 24 hours ago
    """
    expired = []
    for job_name in job_names:
        service, instance = chronos_tools.decompose_job_id(job_name)
        temporary_jobs = chronos_tools.get_temporary_jobs_for_service_instance(
            client=client, service=service, instance=instance
        )
        for job in temporary_jobs:
            last_run_time, last_run_state = chronos_tools.get_status_last_run(job)
            try:
                chronos_job_config = chronos_tools.load_chronos_job_config(
                    service=service, instance=instance, cluster=cluster, soa_dir=soa_dir
                )
                interval = chronos_job_config.get_schedule_interval_in_seconds() or 0
            except NoConfigurationForServiceError:
                # If we can't get the job's config, default to cleanup after 1 day
                interval = 0
            if last_run_state != chronos_tools.LastRunState.NotRun:
                if (
                    datetime.datetime.now(dateutil.tz.tzutc())
                    - dateutil.parser.parse(last_run_time)
                ) > max(
                    datetime.timedelta(seconds=interval), datetime.timedelta(days=1)
                ):
                    expired.append(job_name)
    return expired
Exemple #8
0
def send_event(service, instance, soa_dir, status, output):
    """Send an event to sensu via pysensu_yelp with the given information.

    :param service: The service name the event is about
    :param instance: The instance of the service the event is about
    :param soa_dir: The service directory to read monitoring information from
    :param status: The status to emit for this event
    :param output: The output to emit for this event
    """
    cluster = load_system_paasta_config().get_cluster()
    monitoring_overrides = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=cluster,
        soa_dir=soa_dir,
    ).get_monitoring()
    # In order to let sensu know how often to expect this check to fire,
    # we need to set the ``check_every`` to the frequency of our cron job, which
    # is 10s.
    monitoring_overrides['check_every'] = '10s'
    # Most deploy_chronos_jobs failures are transient and represent issues
    # that will probably be fixed eventually, so we set an alert_after
    # to suppress extra noise
    monitoring_overrides['alert_after'] = '10m'
    check_name = 'setup_chronos_job.%s' % compose_job_id(service, instance)
    monitoring_tools.send_event(
        service=service,
        check_name=check_name,
        overrides=monitoring_overrides,
        status=status,
        output=output,
        soa_dir=soa_dir,
    )
Exemple #9
0
def main(args):
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    # get those jobs listed in configs
    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    service_job_mapping = build_service_job_mapping(client, configured_jobs)
    for service_instance, job_state_pairs in service_job_mapping.items():
        service, instance = service_instance[0], service_instance[1]
        chronos_job_config = load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
        sensu_output, sensu_status = sensu_message_status_for_jobs(
            chronos_job_config, service, instance, job_state_pairs)
        monitoring_overrides = compose_monitoring_overrides_for_service(
            chronos_job_config=chronos_job_config,
            soa_dir=soa_dir
        )
        send_event(
            service=service,
            instance=instance,
            monitoring_overrides=monitoring_overrides,
            status_code=sensu_status,
            message=sensu_output,
            soa_dir=soa_dir,
        )
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(client, configured_jobs)

        for service_instance, chronos_job in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            try:
                chronos_job_config = load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
            except utils.NoDeploymentsAvailable:
                log.info("Skipping %s because no deployments are available" % service)
                continue
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                chronos_job=chronos_job,
                client=client,
            )
            if sensu_status is not None:
                send_event(chronos_job_config, sensu_status, sensu_output)
    except (chronos.ChronosAPIError) as e:
        log.error("CRITICAL: Unable to contact Chronos! Error: %s" % e)
        sys.exit(2)
Exemple #11
0
def chronos_instance_status(instance_status, service, instance, verbose):
    cstatus = {}
    chronos_config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(chronos_config)
    job_config = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=settings.cluster,
        soa_dir=settings.soa_dir,
    )
    cstatus['desired_state'] = job_config.get_desired_state()
    job_type = chronos_tools.get_job_type(job_config.config_dict)
    if job_type == chronos_tools.JobType.Scheduled:
        schedule_type = 'schedule'
        schedule = job_config.get_schedule()
        epsilon = job_config.get_epsilon()
        time_zone = job_config.get_schedule_time_zone()
        if time_zone == 'null' or time_zone is None:
            time_zone = 'UTC'
        cstatus['schedule'] = {}
        cstatus['schedule']['schedule'] = schedule
        cstatus['schedule']['epsilon'] = epsilon
        cstatus['schedule']['time_zone'] = time_zone
    elif job_type == chronos_tools.JobType.Dependent:
        schedule_type = 'parents'
        parents = job_config.get_parents()
        cstatus['parents'] = parents
    else:
        schedule_type = 'unknown'
    cstatus['schedule_type'] = schedule_type
    cstatus['status'] = {}
    if verbose:
        running_task_count = len(
            select_tasks_by_id(
                a_sync.block(get_cached_list_of_running_tasks_from_frameworks),
                job_config.get_job_name(),
            ),
        )
        cstatus['status']['mesos_state'] = 'running' if running_task_count else 'not_running'
    cstatus['status']['disabled_state'] = 'not_scheduled' if job_config.get_disabled() else 'scheduled'
    cstatus['status']['chronos_state'] = chronos_tools.get_chronos_status_for_job(client, service, instance)
    cstatus['command'] = job_config.get_cmd()
    last_time, last_status = chronos_tools.get_status_last_run(job_config.config_dict)
    if last_status == chronos_tools.LastRunState.Success:
        last_status = 'success'
    elif last_status == chronos_tools.LastRunState.Fail:
        last_status = 'fail'
    elif last_status == chronos_tools.LastRunState.NotRun:
        last_status = 'not_run'
    else:
        last_status = ''
    if last_status == 'not_run' or last_status == '':
        last_time = 'never'
    cstatus['last_status'] = {}
    cstatus['last_status']['result'] = last_status
    cstatus['last_status']['time'] = last_time

    return cstatus
Exemple #12
0
def validate_chronos(service_path):
    """Check that any chronos configurations are valid"""
    soa_dir, service = path_to_soa_dir_service(service_path)
    instance_type = 'chronos'
    chronos_spacer = paasta_tools.chronos_tools.INTERNAL_SPACER

    returncode = True

    if service.startswith(TMP_JOB_IDENTIFIER):
        paasta_print((
            "Services using scheduled tasks cannot be named %s, as it clashes with the "
            "identifier used for temporary jobs" % TMP_JOB_IDENTIFIER))
        return False
    for cluster in list_clusters(service, soa_dir, instance_type):
        services_in_cluster = get_services_for_cluster(cluster=cluster,
                                                       instance_type='chronos',
                                                       soa_dir=soa_dir)
        valid_services = {
            f"{name}{chronos_spacer}{instance}"
            for name, instance in services_in_cluster
        }
        for instance in list_all_instances_for_service(
                service=service,
                clusters=[cluster],
                instance_type=instance_type,
                soa_dir=soa_dir,
        ):
            cjc = load_chronos_job_config(service, instance, cluster, False,
                                          soa_dir)
            parents = cjc.get_parents() or []
            checks_passed, check_msgs = cjc.validate()

            for parent in parents:
                if not check_parent_format(parent):
                    continue
                if f"{service}{chronos_spacer}{instance}" == parent:
                    checks_passed = False
                    check_msgs.append("Job %s cannot depend on itself" %
                                      parent)
                elif parent not in valid_services:
                    checks_passed = False
                    check_msgs.append("Parent job %s could not be found" %
                                      parent)

            # Remove duplicate check_msgs
            unique_check_msgs = list(set(check_msgs))

            if not checks_passed:
                paasta_print(
                    invalid_chronos_instance(cluster, instance,
                                             "\n  ".join(unique_check_msgs)))
                returncode = False
            else:
                paasta_print(valid_chronos_instance(cluster, instance))
    return returncode
Exemple #13
0
def compose_monitoring_overrides_for_service(cluster, service, instance,
                                             soa_dir):
    """ Compose a group of monitoring overrides """
    monitoring_overrides = chronos_tools.load_chronos_job_config(
        service=service, instance=instance, cluster=cluster,
        soa_dir=soa_dir).get_monitoring()
    monitoring_overrides['alert_after'] = '2m'
    monitoring_overrides['check_every'] = '1m'
    monitoring_overrides['runbook'] = monitoring_tools.get_runbook(
        monitoring_overrides, service, soa_dir=soa_dir)
    return monitoring_overrides
def compose_monitoring_overrides_for_service(cluster, service, instance, soa_dir):
    """ Compose a group of monitoring overrides """
    monitoring_overrides = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=cluster,
        soa_dir=soa_dir
    ).get_monitoring()
    monitoring_overrides['alert_after'] = '2m'
    monitoring_overrides['check_every'] = '1m'
    monitoring_overrides['runbook'] = monitoring_tools.get_runbook(monitoring_overrides, service, soa_dir=soa_dir)
    return monitoring_overrides
Exemple #15
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(
        cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(
            client, configured_jobs)

        for service_instance, job_state_pairs in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            try:
                chronos_job_config = load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
            except utils.NoDeploymentsAvailable:
                paasta_print(
                    utils.PaastaColors.cyan(
                        "Skipping %s because no deployments are available" %
                        service))
                continue
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                service=service,
                instance=instance,
                cluster=cluster,
                job_state_pairs=job_state_pairs)
            if sensu_status is not None:
                monitoring_overrides = compose_monitoring_overrides_for_service(
                    chronos_job_config=chronos_job_config, soa_dir=soa_dir)
                send_event(
                    service=service,
                    instance=instance,
                    monitoring_overrides=monitoring_overrides,
                    status_code=sensu_status,
                    message=sensu_output,
                    soa_dir=soa_dir,
                )
    except (chronos.ChronosAPIError) as e:
        paasta_print(
            utils.PaastaColors.red(
                "CRITICAL: Unable to contact Chronos! Error: %s" % e))
        sys.exit(2)
Exemple #16
0
def main():
    args = parse_args()

    cluster = load_system_paasta_config().get_cluster()

    service, instance = chronos_tools.decompose_job_id(args.service_instance)

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = load_system_paasta_config()

    chronos_job_config = chronos_tools.load_chronos_job_config(
        service,
        instance,
        system_paasta_config.get_cluster(),
        soa_dir=args.soa_dir)

    try:
        complete_job_config = chronos_tools.create_complete_config(
            service=service,
            job_name=instance,
            soa_dir=args.soa_dir,
        )

    except (NoDeploymentsAvailable, NoDockerImageError) as e:
        error_msg = "No deployment found for %s in cluster %s. Has Jenkins run for it?" % (
            args.service_instance, cluster)
        print error_msg
        raise e
    except chronos_tools.UnknownChronosJobError as e:
        error_msg = (
            "Could not read chronos configuration file for %s in cluster %s\n"
            % (args.service_instance, cluster) + "Error was: %s" % str(e))
        print error_msg
        raise e
    except chronos_tools.InvalidParentError as e:
        raise e

    # complete_job_config is a formatted version
    # of the job, so the command is fornatted in the context
    # of 'now'
    # replace it with the 'original' cmd so it can be
    # re rendered
    original_command = chronos_job_config.get_cmd()
    complete_job_config['command'] = original_command
    clone = clone_job(
        complete_job_config,
        datetime.datetime.strptime(args.execution_date, "%Y-%m-%dT%H:%M:%S"))
    client.add(clone)
Exemple #17
0
def get_instance_configs_for_service(service, soa_dir, type_filter=None):
    for cluster in list_clusters(
        service=service,
        soa_dir=soa_dir,
    ):
        if type_filter is None:
            type_filter = ['marathon', 'chronos', 'adhoc']
        if 'marathon' in type_filter:
            for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='marathon',
                soa_dir=soa_dir,
            ):
                yield load_marathon_service_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
        if 'chronos' in type_filter:
            for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='chronos',
                soa_dir=soa_dir,
            ):
                yield load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
        if 'adhoc' in type_filter:
            for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='adhoc',
                soa_dir=soa_dir,
            ):
                yield load_adhoc_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
Exemple #18
0
def get_chronos_steps(service, soa_dir):
    """This is a kind of funny function that gets all the chronos instances
    for a service and massages it into a form that matches up with what
    deploy.yaml's steps look like. This is only so we can compare it 1-1
    with what deploy.yaml has for linting."""
    steps = []
    for cluster in list_clusters(service, soa_dir):
        for _, instance in get_service_instance_list(
            service=service, cluster=cluster, instance_type="chronos", soa_dir=soa_dir
        ):
            config = load_chronos_job_config(
                service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, load_deployments=False
            )
            steps.append(config.get_deploy_group())
    return steps
Exemple #19
0
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(client, configured_jobs)

        for service_instance, job_state_pairs in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            try:
                chronos_job_config = load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                )
            except utils.NoDeploymentsAvailable:
                paasta_print(utils.PaastaColors.cyan("Skipping %s because no deployments are available" % service))
                continue
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                service=service,
                instance=instance,
                cluster=cluster,
                job_state_pairs=job_state_pairs
            )
            if sensu_status is not None:
                monitoring_overrides = compose_monitoring_overrides_for_service(
                    chronos_job_config=chronos_job_config,
                    soa_dir=soa_dir
                )
                send_event(
                    service=service,
                    instance=instance,
                    monitoring_overrides=monitoring_overrides,
                    status_code=sensu_status,
                    message=sensu_output,
                    soa_dir=soa_dir,
                )
    except (chronos.ChronosAPIError) as e:
        paasta_print(utils.PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e))
        sys.exit(2)
Exemple #20
0
def validate_chronos(service_path):
    """Check that any chronos configurations are valid"""
    soa_dir, service = path_to_soa_dir_service(service_path)
    instance_type = 'chronos'
    chronos_spacer = paasta_tools.chronos_tools.INTERNAL_SPACER

    returncode = True
    for cluster in list_clusters(service, soa_dir, instance_type):
        services_in_cluster = get_services_for_cluster(cluster=cluster,
                                                       instance_type='chronos',
                                                       soa_dir=soa_dir)
        valid_services = set([
            "%s%s%s" % (name, chronos_spacer, instance)
            for name, instance in services_in_cluster
        ])
        for instance in list_all_instances_for_service(
                service=service,
                clusters=[cluster],
                instance_type=instance_type,
                soa_dir=soa_dir):
            cjc = load_chronos_job_config(service, instance, cluster, False,
                                          soa_dir)
            parents = cjc.get_parents() or []
            checks_passed, check_msgs = cjc.validate()

            for parent in parents:
                if not check_parent_format(parent):
                    continue
                if "%s%s%s" % (service, chronos_spacer, instance) == parent:
                    checks_passed = False
                    check_msgs.append("Job %s cannot depend on itself" %
                                      parent)
                elif parent not in valid_services:
                    checks_passed = False
                    check_msgs.append("Parent job %s could not be found" %
                                      parent)

            # Remove duplicate check_msgs
            unique_check_msgs = list(set(check_msgs))

            if not checks_passed:
                print invalid_chronos_instance(cluster, instance,
                                               "\n  ".join(unique_check_msgs))
                returncode = False
            else:
                print valid_chronos_instance(cluster, instance)
    return returncode
Exemple #21
0
def main():
    args = parse_args()

    cluster = load_system_paasta_config().get_cluster()

    service, instance = chronos_tools.decompose_job_id(args.service_instance)

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = load_system_paasta_config()

    chronos_job_config = chronos_tools.load_chronos_job_config(
        service, instance, system_paasta_config.get_cluster(), soa_dir=args.soa_dir)

    try:
        complete_job_config = chronos_tools.create_complete_config(
            service=service,
            job_name=instance,
            soa_dir=args.soa_dir,
        )

    except (NoDeploymentsAvailable, NoDockerImageError) as e:
        error_msg = "No deployment found for %s in cluster %s. Has Jenkins run for it?" % (
            args.service_instance, cluster)
        print error_msg
        raise e
    except chronos_tools.UnknownChronosJobError as e:
        error_msg = (
            "Could not read chronos configuration file for %s in cluster %s\n" % (args.service_instance, cluster) +
            "Error was: %s" % str(e))
        print error_msg
        raise e
    except chronos_tools.InvalidParentError as e:
        raise e

    # complete_job_config is a formatted version
    # of the job, so the command is fornatted in the context
    # of 'now'
    # replace it with the 'original' cmd so it can be
    # re rendered
    original_command = chronos_job_config.get_cmd()
    complete_job_config['command'] = original_command
    clone = clone_job(complete_job_config, datetime.datetime.strptime(args.execution_date, "%Y-%m-%dT%H:%M:%S"))
    client.add(clone)
Exemple #22
0
def get_instance_configs_for_service(service, soa_dir):
    for cluster in list_clusters(
            service=service,
            soa_dir=soa_dir,
    ):
        for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='marathon',
                soa_dir=soa_dir,
        ):
            yield load_marathon_service_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
        for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='chronos',
                soa_dir=soa_dir,
        ):
            yield load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
        for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='adhoc',
                soa_dir=soa_dir,
        ):
            yield load_adhoc_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
Exemple #23
0
def get_instance_configs_for_service(service, soa_dir):
    for cluster in list_clusters(
        service=service,
        soa_dir=soa_dir,
    ):
        for _, instance in get_service_instance_list(
            service=service,
            cluster=cluster,
            instance_type='marathon',
            soa_dir=soa_dir,
        ):
            yield load_marathon_service_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
        for _, instance in get_service_instance_list(
            service=service,
            cluster=cluster,
            instance_type='chronos',
            soa_dir=soa_dir,
        ):
            yield load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
        for _, instance in get_service_instance_list(
            service=service,
            cluster=cluster,
            instance_type='adhoc',
            soa_dir=soa_dir,
        ):
            yield load_adhoc_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
def main():
    args = parse_args()
    soa_dir = args.soa_dir
    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)
    system_paasta_config = utils.load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    configured_jobs = chronos_tools.get_chronos_jobs_for_cluster(cluster, soa_dir=soa_dir)

    try:
        service_job_mapping = build_service_job_mapping(client, configured_jobs)

        for service_instance, job_state_pairs in service_job_mapping.items():
            service, instance = service_instance[0], service_instance[1]
            chronos_job_config = load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
            )
            sensu_output, sensu_status = sensu_message_status_for_jobs(
                chronos_job_config=chronos_job_config,
                service=service,
                instance=instance,
                cluster=cluster,
                job_state_pairs=job_state_pairs
            )
            monitoring_overrides = compose_monitoring_overrides_for_service(
                chronos_job_config=chronos_job_config,
                soa_dir=soa_dir
            )
            send_event(
                service=service,
                instance=instance,
                monitoring_overrides=monitoring_overrides,
                status_code=sensu_status,
                message=sensu_output,
                soa_dir=soa_dir,
            )
    except (ServerNotFoundError, chronos.ChronosAPIError, socket_error) as e:
        print(utils.PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e))
        sys.exit(2)
Exemple #25
0
def validate_chronos(service_path):
    """Check that any chronos configurations are valid"""
    soa_dir, service = path_to_soa_dir_service(service_path)
    instance_type = 'chronos'
    chronos_spacer = paasta_tools.chronos_tools.INTERNAL_SPACER

    returncode = True

    if service.startswith(TMP_JOB_IDENTIFIER):
        print ("Services using scheduled tasks cannot be named %s, as it clashes with the"
               " identifier used for temporary jobs" % TMP_JOB_IDENTIFIER)
        return False
    for cluster in list_clusters(service, soa_dir, instance_type):
        services_in_cluster = get_services_for_cluster(cluster=cluster, instance_type='chronos', soa_dir=soa_dir)
        valid_services = set(["%s%s%s" % (name, chronos_spacer, instance) for name, instance in services_in_cluster])
        for instance in list_all_instances_for_service(
                service=service, clusters=[cluster], instance_type=instance_type,
                soa_dir=soa_dir):
            cjc = load_chronos_job_config(service, instance, cluster, False, soa_dir)
            parents = cjc.get_parents() or []
            checks_passed, check_msgs = cjc.validate()

            for parent in parents:
                if not check_parent_format(parent):
                    continue
                if "%s%s%s" % (service, chronos_spacer, instance) == parent:
                    checks_passed = False
                    check_msgs.append("Job %s cannot depend on itself" % parent)
                elif parent not in valid_services:
                    checks_passed = False
                    check_msgs.append("Parent job %s could not be found" % parent)

            # Remove duplicate check_msgs
            unique_check_msgs = list(set(check_msgs))

            if not checks_passed:
                print invalid_chronos_instance(cluster, instance, "\n  ".join(unique_check_msgs))
                returncode = False
            else:
                print valid_chronos_instance(cluster, instance)
    return returncode
Exemple #26
0
def validate_chronos(service_path):
    soa_dir, service = path_to_soa_dir_service(service_path)
    instance_type = 'chronos'

    returncode = 0
    for cluster in list_clusters(service, soa_dir, instance_type):
        for instance in list_all_instances_for_service(
                service=service, clusters=[cluster], instance_type=instance_type,
                soa_dir=soa_dir):
            cjc = load_chronos_job_config(service, instance, cluster, False, soa_dir)
            checks_passed, check_msgs = cjc.validate()

            # Remove duplicate check_msgs
            unique_check_msgs = list(set(check_msgs))

            if not checks_passed:
                print invalid_chronos_instance(cluster, instance, "\n  ".join(unique_check_msgs))
                returncode = 1
            else:
                print valid_chronos_instance(cluster, instance)
    return returncode
Exemple #27
0
def get_chronos_steps(service, soa_dir):
    """This is a kind of funny function that gets all the chronos instances
    for a service and massages it into a form that matches up with what
    deploy.yaml's steps look like. This is only so we can compare it 1-1
    with what deploy.yaml has for linting."""
    steps = []
    for cluster in list_clusters(service, soa_dir):
        for _, instance in get_service_instance_list(
                service=service,
                cluster=cluster,
                instance_type='chronos',
                soa_dir=soa_dir,
        ):
            config = load_chronos_job_config(
                service=service,
                instance=instance,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=False,
            )
            steps.append(config.get_deploy_group())
    return steps
Exemple #28
0
def validate_chronos(service_path):
    """Check that any chronos configurations are valid"""
    soa_dir, service = path_to_soa_dir_service(service_path)
    instance_type = "chronos"
    chronos_spacer = paasta_tools.chronos_tools.INTERNAL_SPACER

    returncode = True
    for cluster in list_clusters(service, soa_dir, instance_type):
        services_in_cluster = get_services_for_cluster(cluster=cluster, instance_type="chronos", soa_dir=soa_dir)
        valid_services = set(["%s%s%s" % (name, chronos_spacer, instance) for name, instance in services_in_cluster])
        for instance in list_all_instances_for_service(
            service=service, clusters=[cluster], instance_type=instance_type, soa_dir=soa_dir
        ):
            cjc = load_chronos_job_config(service, instance, cluster, False, soa_dir)
            parents = cjc.get_parents() or []
            checks_passed, check_msgs = cjc.validate()

            for parent in parents:
                if not check_parent_format(parent):
                    continue
                if "%s%s%s" % (service, chronos_spacer, instance) == parent:
                    checks_passed = False
                    check_msgs.append("Job %s cannot depend on itself" % parent)
                elif parent not in valid_services:
                    checks_passed = False
                    check_msgs.append("Parent job %s could not be found" % parent)

            # Remove duplicate check_msgs
            unique_check_msgs = list(set(check_msgs))

            if not checks_passed:
                print invalid_chronos_instance(cluster, instance, "\n  ".join(unique_check_msgs))
                returncode = False
            else:
                print valid_chronos_instance(cluster, instance)
    return returncode
Exemple #29
0
def main():
    configure_log()
    args = parse_args()
    soa_dir = args.soa_dir
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.WARNING)
    try:
        service, instance, _, __ = decompose_job_id(args.service_instance)
    except InvalidJobNameError:
        log.error(
            "Invalid service instance '%s' specified. Format is service%sinstance."
            % (args.service_instance, SPACER))
        sys.exit(1)

    client = chronos_tools.get_chronos_client(
        chronos_tools.load_chronos_config())
    cluster = load_system_paasta_config().get_cluster()

    try:
        chronos_job_config = chronos_tools.load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
    except (NoDeploymentsAvailable, NoDockerImageError):
        error_msg = "No deployment found for %s in cluster %s. Has Jenkins run for it?" % (
            args.service_instance, cluster)
        send_event(
            service=service,
            instance=None,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.CRITICAL,
            output=error_msg,
        )
        log.error(error_msg)
        # exit 0 because the event was sent to the right team and this is not an issue with Paasta itself
        sys.exit(0)
    except chronos_tools.InvalidChronosConfigError as e:
        error_msg = (
            "Could not read chronos configuration file for %s in cluster %s\n"
            % (args.service_instance, cluster) + "Error was: %s" % str(e))
        log.error(error_msg)
        send_event(
            service=service,
            instance=instance,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.CRITICAL,
            output=error_msg,
        )
        # exit 0 because the event was sent to the right team and this is not an issue with Paasta itself
        sys.exit(0)

    complete_job_config = chronos_tools.create_complete_config(
        service=service,
        job_name=instance,
        soa_dir=soa_dir,
    )
    status, output = setup_job(
        service=service,
        instance=instance,
        cluster=cluster,
        chronos_job_config=chronos_job_config,
        complete_job_config=complete_job_config,
        client=client,
    )
    sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK
    send_event(
        service=service,
        instance=instance,
        soa_dir=soa_dir,
        status=sensu_status,
        output=output,
    )
    # We exit 0 because the script finished ok and the event was sent to the right team.
    sys.exit(0)
Exemple #30
0
def main():
    configure_log()
    args = parse_args()
    soa_dir = args.soa_dir
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.WARNING)
    try:
        service, instance, _, __ = decompose_job_id(args.service_instance)
    except InvalidJobNameError:
        log.error("Invalid service instance '%s' specified. Format is service%sinstance."
                  % (args.service_instance, SPACER))
        sys.exit(1)

    client = chronos_tools.get_chronos_client(chronos_tools.load_chronos_config())
    cluster = load_system_paasta_config().get_cluster()

    try:
        chronos_job_config = chronos_tools.load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
    except (NoDeploymentsAvailable, NoDockerImageError):
        error_msg = "No deployment found for %s in cluster %s. Has Jenkins run for it?" % (
            args.service_instance, cluster)
        send_event(
            service=service,
            instance=None,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.CRITICAL,
            output=error_msg,
        )
        log.error(error_msg)
        # exit 0 because the event was sent to the right team and this is not an issue with Paasta itself
        sys.exit(0)
    except chronos_tools.InvalidChronosConfigError as e:
        error_msg = (
            "Could not read chronos configuration file for %s in cluster %s\n" % (args.service_instance, cluster) +
            "Error was: %s" % str(e))
        log.error(error_msg)
        send_event(
            service=service,
            instance=instance,
            soa_dir=soa_dir,
            status=pysensu_yelp.Status.CRITICAL,
            output=error_msg,
        )
        # exit 0 because the event was sent to the right team and this is not an issue with Paasta itself
        sys.exit(0)

    complete_job_config = chronos_tools.create_complete_config(
        service=service,
        job_name=instance,
        soa_dir=soa_dir,
    )
    status, output = setup_job(
        service=service,
        instance=instance,
        cluster=cluster,
        chronos_job_config=chronos_job_config,
        complete_job_config=complete_job_config,
        client=client,
    )
    sensu_status = pysensu_yelp.Status.CRITICAL if status else pysensu_yelp.Status.OK
    send_event(
        service=service,
        instance=instance,
        soa_dir=soa_dir,
        status=sensu_status,
        output=output,
    )
    # We exit 0 because the script finished ok and the event was sent to the right team.
    sys.exit(0)
Exemple #31
0
def paasta_rerun(args):
    """Reruns a Chronos job.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    system_paasta_config = load_system_paasta_config()
    soa_dir = args.soa_dir
    service = figure_out_service_name(
        args, soa_dir)  # exit with an error if the service doesn't exist
    if args.execution_date:
        execution_date = args.execution_date
    else:
        execution_date = None

    all_clusters = list_clusters(soa_dir=soa_dir)
    actual_deployments = get_actual_deployments(
        service, soa_dir)  # cluster.instance: sha
    if actual_deployments:
        deploy_pipeline = list(get_planned_deployments(
            service, soa_dir))  # cluster.instance
        deployed_clusters = list_deployed_clusters(deploy_pipeline,
                                                   actual_deployments)
        deployed_cluster_instance = _get_cluster_instance(
            actual_deployments.keys())

    if args.clusters is not None:
        clusters = args.clusters.split(",")
    else:
        clusters = deployed_clusters

    for cluster in clusters:
        print "cluster: %s" % cluster

        if cluster not in all_clusters:
            print "  Warning: \"%s\" does not look like a valid cluster." % cluster
            continue
        if cluster not in deployed_clusters:
            print "  Warning: service \"%s\" has not been deployed to \"%s\" yet." % (
                service, cluster)
            continue
        if not deployed_cluster_instance[cluster].get(args.instance, False):
            print(
                "  Warning: instance \"%s\" is either invalid "
                "or has not been deployed to \"%s\" yet." %
                (args.instance, cluster))
            continue

        try:
            chronos_job_config = chronos_tools.load_chronos_job_config(
                service,
                args.instance,
                cluster,
                load_deployments=False,
                soa_dir=soa_dir)
            if chronos_tools.uses_time_variables(
                    chronos_job_config) and execution_date is None:
                print(
                    "  Warning: \"%s\" uses time variables interpolation, "
                    "please supply a `--execution_date` argument." %
                    args.instance)
                continue
        except chronos_tools.UnknownChronosJobError as e:
            print "  Warning: %s" % e.message
            continue
        if execution_date is None:
            execution_date = _get_default_execution_date()

        rc, output = execute_chronos_rerun_on_remote_master(
            service=service,
            instancename=args.instance,
            cluster=cluster,
            verbose=args.verbose,
            execution_date=execution_date.strftime(
                chronos_tools.EXECUTION_DATE_FORMAT),
            system_paasta_config=system_paasta_config,
        )
        if rc == 0:
            print PaastaColors.green('  successfully created job')
        else:
            print PaastaColors.red('  error')
            print output
Exemple #32
0
def perform_command(command, service, instance, cluster, verbose, soa_dir):
    """Performs a start/stop/restart/status on an instance
    :param command: String of start, stop, restart, status or scale
    :param service: service name
    :param instance: instance name, like "main" or "canary"
    :param cluster: cluster name
    :param verbose: int verbosity level
    :returns: A unix-style return code
    """
    chronos_config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(chronos_config)
    job_config = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=cluster,
        soa_dir=soa_dir,
    )
    complete_job_config = chronos_tools.create_complete_config(service, instance, soa_dir=soa_dir)
    job_id = complete_job_config["name"]

    if command == "start":
        start_chronos_job(
            service=service,
            instance=instance,
            job_id=job_id,
            client=client,
            cluster=cluster,
            job_config=job_config,
            complete_job_config=complete_job_config,
            emergency=True,
        )
    elif command == "stop":
        matching_jobs = chronos_tools.lookup_chronos_jobs(
            service=service,
            instance=instance,
            client=client,
            include_disabled=True,
            include_temporary=True
        )
        stop_chronos_job(service, instance, client, cluster, matching_jobs, emergency=True)
    elif command == "restart":
        matching_jobs = chronos_tools.lookup_chronos_jobs(
            service=service,
            instance=instance,
            client=client,
            include_disabled=True,
        )
        restart_chronos_job(
            service=service,
            instance=instance,
            job_id=job_id,
            client=client,
            cluster=cluster,
            matching_jobs=matching_jobs,
            job_config=job_config,
            complete_job_config=complete_job_config,
            emergency=True,
        )
    elif command == "status":
        # Verbose mode shows previous versions.
        matching_jobs = chronos_tools.lookup_chronos_jobs(
            service=service,
            instance=instance,
            client=client,
            include_disabled=True,
        )
        sorted_matching_jobs = chronos_tools.sort_jobs(matching_jobs)
        job_config = chronos_tools.load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
        paasta_print(status_chronos_jobs(client, sorted_matching_jobs, job_config, verbose))
    else:
        # The command parser shouldn't have let us get this far...
        raise NotImplementedError("Command %s is not implemented!" % command)
    return 0
Exemple #33
0
def get_instance_configs_for_service(
    service: str,
    soa_dir: str,
    type_filter: Optional[Sequence[str]] = None,
) -> Iterable[InstanceConfig]:
    for cluster in list_clusters(
            service=service,
            soa_dir=soa_dir,
    ):
        if type_filter is None:
            type_filter = ['marathon', 'chronos', 'adhoc', 'kubernetes']
        if 'marathon' in type_filter:
            for _, instance in get_service_instance_list(
                    service=service,
                    cluster=cluster,
                    instance_type='marathon',
                    soa_dir=soa_dir,
            ):
                yield load_marathon_service_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
        if 'chronos' in type_filter:
            for _, instance in get_service_instance_list(
                    service=service,
                    cluster=cluster,
                    instance_type='chronos',
                    soa_dir=soa_dir,
            ):
                yield load_chronos_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
        if 'adhoc' in type_filter:
            for _, instance in get_service_instance_list(
                    service=service,
                    cluster=cluster,
                    instance_type='adhoc',
                    soa_dir=soa_dir,
            ):
                yield load_adhoc_job_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
        if 'kubernetes' in type_filter:
            for _, instance in get_service_instance_list(
                    service=service,
                    cluster=cluster,
                    instance_type='kubernetes',
                    soa_dir=soa_dir,
            ):
                yield load_kubernetes_service_config(
                    service=service,
                    instance=instance,
                    cluster=cluster,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
Exemple #34
0
def paasta_rerun(args):
    """Reruns a Chronos job.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    soa_dir = args.soa_dir
    service = figure_out_service_name(args, soa_dir)  # exit with an error if the service doesn't exist
    if args.execution_date:
        execution_date = args.execution_date
    else:
        execution_date = None

    all_clusters = list_clusters(soa_dir=soa_dir)
    actual_deployments = get_actual_deployments(service, soa_dir)  # cluster.instance: sha
    if actual_deployments:
        deploy_pipeline = list(get_planned_deployments(service, soa_dir))  # cluster.instance
        deployed_clusters = list_deployed_clusters(deploy_pipeline, actual_deployments)
        deployed_cluster_instance = _get_cluster_instance(actual_deployments.keys())

    if args.clusters is not None:
        clusters = args.clusters.split(",")
    else:
        clusters = deployed_clusters

    for cluster in clusters:
        print "cluster: %s" % cluster

        if cluster not in all_clusters:
            print "  Warning: \"%s\" does not look like a valid cluster." % cluster
            continue
        if cluster not in deployed_clusters:
            print "  Warning: service \"%s\" has not been deployed to \"%s\" yet." % (service, cluster)
            continue
        if not deployed_cluster_instance[cluster].get(args.instance, False):
            print ("  Warning: instance \"%s\" is either invalid "
                   "or has not been deployed to \"%s\" yet." % (args.instance, cluster))
            continue

        try:
            chronos_job_config = chronos_tools.load_chronos_job_config(
                service, args.instance, cluster, load_deployments=False, soa_dir=soa_dir)
            if chronos_tools.uses_time_variables(chronos_job_config) and execution_date is None:
                print ("  Warning: \"%s\" uses time variables interpolation, "
                       "please supply a `--execution_date` argument." % args.instance)
                continue
        except chronos_tools.UnknownChronosJobError as e:
            print "  Warning: %s" % e.message
            continue
        if execution_date is None:
            execution_date = _get_default_execution_date()

        rc, output = execute_chronos_rerun_on_remote_master(
            service=service,
            instancename=args.instance,
            cluster=cluster,
            verbose=args.verbose,
            execution_date=execution_date.strftime(chronos_tools.EXECUTION_DATE_FORMAT)
        )
        if rc == 0:
            print PaastaColors.green('  successfully created job')
        else:
            print PaastaColors.red('  error')
            print output
Exemple #35
0
def main():
    args = parse_args()

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    service, instance = chronos_tools.decompose_job_id(args.service_instance)

    config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(config)

    related_jobs = chronos_tools.get_related_jobs_configs(cluster,
                                                          service,
                                                          instance,
                                                          soa_dir=args.soa_dir)
    if not related_jobs:
        error_msg = "No deployment found for {} in cluster {}. Has Jenkins run for it?".format(
            args.service_instance,
            cluster,
        )
        paasta_print(error_msg)
        raise NoDeploymentsAvailable

    if not args.run_all_related_jobs:
        # Strip all the configuration for the related services
        # those information will not be used by the rest of the flow
        related_jobs = {
            (service, instance): related_jobs[(service, instance)],
        }

    complete_job_configs = {}
    for (srv, inst) in related_jobs:
        try:
            complete_job_configs.update(
                {
                    (srv, inst):
                    chronos_tools.create_complete_config(
                        service=srv,
                        job_name=inst,
                        soa_dir=args.soa_dir,
                    ),
                }, )
        except (NoDeploymentsAvailable, NoDockerImageError) as e:
            error_msg = "No deployment found for {} in cluster {}. Has Jenkins run for it?".format(
                chronos_tools.compose_job_id(srv, inst),
                cluster,
            )
            paasta_print(error_msg)
            raise e
        except NoConfigurationForServiceError as e:
            error_msg = (
                "Could not read chronos configuration file for {} in cluster {}\nError was: {}"
                .format(
                    chronos_tools.compose_job_id(srv, inst),
                    cluster,
                    str(e),
                ))
            paasta_print(error_msg)
            raise e
        except chronos_tools.InvalidParentError as e:
            raise e

    if not args.run_all_related_jobs:
        sorted_jobs = [(service, instance)]
    else:
        sorted_jobs = chronos_tools.topological_sort_related_jobs(
            cluster, service, instance, soa_dir=args.soa_dir)

    timestamp = datetime.datetime.utcnow().isoformat()

    chronos_to_add = []
    for (service, instance) in sorted_jobs:
        # complete_job_config is a formatted version of the job,
        # so the command is formatted in the context of 'now'
        # replace it with the 'original' cmd so it can be re rendered
        chronos_job_config = chronos_tools.load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=args.soa_dir,
        )
        original_command = chronos_job_config.get_cmd()
        complete_job_config = complete_job_configs[(service, instance)]
        complete_job_config['command'] = original_command
        clone = clone_job(
            chronos_job=complete_job_config,
            timestamp=timestamp,
            force_disabled=args.force_disabled,
        )
        # modify the command to run commands for a given date
        clone = modify_command_for_date(
            chronos_job=clone,
            date=datetime.datetime.strptime(args.execution_date,
                                            "%Y-%m-%dT%H:%M:%S"),
            verbose=args.verbose,
        )

        if not args.run_all_related_jobs and chronos_tools.get_job_type(
                clone) == chronos_tools.JobType.Dependent:
            # If the job is a dependent job and we want to re-run only the specific instance
            # remove the parents and update the schedule to start the job as soon as possible
            clone = set_default_schedule(remove_parents(clone))

        chronos_to_add.append(clone)

    for job_to_add in chronos_to_add:
        client.add(job_to_add)
Exemple #36
0
def paasta_rerun(args):
    """Reruns a Chronos job.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    system_paasta_config = load_system_paasta_config()
    soa_dir = args.soa_dir
    service = figure_out_service_name(
        args, soa_dir)  # exit with an error if the service doesn't exist
    if args.execution_date:
        execution_date = args.execution_date
    else:
        execution_date = None

    all_clusters = list_clusters(soa_dir=soa_dir)
    actual_deployments = get_actual_deployments(
        service, soa_dir)  # cluster.instance: sha
    if actual_deployments:
        deploy_pipeline = list(get_planned_deployments(
            service, soa_dir))  # cluster.instance
        deployed_clusters = list_deployed_clusters(deploy_pipeline,
                                                   actual_deployments)
        deployed_cluster_instance = _get_cluster_instance(
            actual_deployments.keys())

    if args.clusters is not None:
        clusters = args.clusters.split(",")
    else:
        clusters = deployed_clusters

    for cluster in clusters:
        paasta_print("cluster: %s" % cluster)

        if cluster not in all_clusters:
            paasta_print(
                "  Warning: \"%s\" does not look like a valid cluster." %
                cluster)
            continue
        if cluster not in deployed_clusters:
            paasta_print(
                f"  Warning: service \"{service}\" has not been deployed to \"{cluster}\" yet."
            )
            continue
        if not deployed_cluster_instance[cluster].get(args.instance, False):
            paasta_print(("  Warning: instance \"%s\" is either invalid "
                          "or has not been deployed to \"%s\" yet." %
                          (args.instance, cluster)))
            continue

        try:
            chronos_job_config = chronos_tools.load_chronos_job_config(
                service,
                args.instance,
                cluster,
                load_deployments=False,
                soa_dir=soa_dir,
            )
            if chronos_tools.uses_time_variables(
                    chronos_job_config) and execution_date is None:
                paasta_print(
                    ("  Warning: \"%s\" uses time variables interpolation, "
                     "please supply a `--execution_date` argument." %
                     args.instance))
                continue
        except NoConfigurationForServiceError as e:
            paasta_print("  Warning: %s" % e)
            continue
        if execution_date is None:
            execution_date = _get_default_execution_date()

        related_job_configs = get_related_jobs_configs(cluster, service,
                                                       args.instance)

        if not args.rerun_type and len(related_job_configs) > 1:
            instance_names = sorted([
                f'- {srv}{chronos_tools.INTERNAL_SPACER}{inst}'
                for srv, inst in related_job_configs
                if srv != service or inst != args.instance
            ])
            paasta_print(PaastaColors.red('  error'))
            paasta_print(
                'Instance {instance} has dependency relations with the following jobs:\n'
                '{relations}\n'
                '\n'
                'Please specify the rerun policy via --rerun-type argument'.
                format(
                    instance=args.instance,
                    relations='\n'.join(instance_names),
                ), )
            return

        rc, output = execute_chronos_rerun_on_remote_master(
            service=service,
            instancename=args.instance,
            cluster=cluster,
            verbose=args.verbose,
            execution_date=execution_date.strftime(
                chronos_tools.EXECUTION_DATE_FORMAT),
            system_paasta_config=system_paasta_config,
            run_all_related_jobs=args.rerun_type == 'graph',
            force_disabled=args.force_disabled,
        )
        if rc == 0:
            paasta_print(PaastaColors.green('  successfully created job'))
        else:
            paasta_print(PaastaColors.red('  error'))
            paasta_print(output)
Exemple #37
0
def perform_command(command, service, instance, cluster, verbose, soa_dir):
    """Performs a start/stop/restart/status on an instance
    :param command: String of start, stop, restart, status or scale
    :param service: service name
    :param instance: instance name, like "main" or "canary"
    :param cluster: cluster name
    :param verbose: int verbosity level
    :returns: A unix-style return code
    """
    chronos_config = chronos_tools.load_chronos_config()
    client = chronos_tools.get_chronos_client(chronos_config)
    job_config = chronos_tools.load_chronos_job_config(
        service=service,
        instance=instance,
        cluster=cluster,
        soa_dir=soa_dir,
    )
    complete_job_config = chronos_tools.create_complete_config(service,
                                                               instance,
                                                               soa_dir=soa_dir)
    job_id = complete_job_config["name"]

    if command == "start":
        start_chronos_job(
            service=service,
            instance=instance,
            job_id=job_id,
            client=client,
            cluster=cluster,
            job_config=job_config,
            complete_job_config=complete_job_config,
            emergency=True,
        )
    elif command == "stop":
        matching_jobs = chronos_tools.lookup_chronos_jobs(
            service=service,
            instance=instance,
            client=client,
            include_disabled=True,
            include_temporary=True)
        stop_chronos_job(service,
                         instance,
                         client,
                         cluster,
                         matching_jobs,
                         emergency=True)
    elif command == "restart":
        matching_jobs = chronos_tools.lookup_chronos_jobs(
            service=service,
            instance=instance,
            client=client,
            include_disabled=True,
        )
        restart_chronos_job(
            service=service,
            instance=instance,
            job_id=job_id,
            client=client,
            cluster=cluster,
            matching_jobs=matching_jobs,
            job_config=job_config,
            complete_job_config=complete_job_config,
            emergency=True,
        )
    elif command == "status":
        # Verbose mode shows previous versions.
        matching_jobs = chronos_tools.lookup_chronos_jobs(
            service=service,
            instance=instance,
            client=client,
            include_disabled=True,
        )
        sorted_matching_jobs = chronos_tools.sort_jobs(matching_jobs)
        job_config = chronos_tools.load_chronos_job_config(
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
        paasta_print(
            status_chronos_jobs(client, sorted_matching_jobs, job_config,
                                verbose))
    else:
        # The command parser shouldn't have let us get this far...
        raise NotImplementedError("Command %s is not implemented!" % command)
    return 0