Ejemplo n.º 1
0
  def execute(self, context):
    # Check for negative max_total_failures option - negative is an error.
    # for per-instance failures, negative means "no limit", so it's allowed.
    if context.options.max_total_failures < 0:
      context.print_err("max_total_failures option must be >0, but you specified %s" %
          context.options.max_total_failures)
      return EXIT_INVALID_PARAMETER

    job = context.options.instance_spec.jobkey
    instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else
        context.options.instance_spec.instance)
    if instances is not None and context.options.strict:
      context.verify_instances_option_validity(job, instances)
    api = context.get_api(job.cluster)
    config = (context.get_job_config(job, context.options.config)
        if context.options.config else None)
    updater_config = UpdaterConfig(
        context.options.batch_size,
        context.options.restart_threshold,
        context.options.watch_secs,
        context.options.max_per_instance_failures,
        context.options.max_total_failures)
    resp = api.restart(job, instances, updater_config,
        context.options.healthcheck_interval_seconds, config=config)

    context.log_response_and_raise(resp,
                                   err_msg="Error restarting job %s:" % str(job))
    context.print_out("Job %s restarted successfully" % str(job))
    if context.options.open_browser:
      webbrowser.open_new_tab(get_job_page(api, job))
    return EXIT_OK
Ejemplo n.º 2
0
    def execute(self, context):
        # Check for negative max_total_failures option - negative is an error.
        # for per-shard failures, negative means "no limit", so it's allowed.
        if context.options.max_total_failures < 0:
            context.print_err(
                "max_total_failures option must be >0, but you specified %s" %
                context.options.max_total_failures)
            context.print_log(
                logging.INFO, "Error: max_total_failures option=%s" %
                context.options.max_total_failures)
            return EXIT_INVALID_PARAMETER

        job = context.options.instance_spec.jobkey
        instances = (None
                     if context.options.instance_spec.instance == ALL_INSTANCES
                     else context.options.instance_spec.instance)
        if instances is not None and context.options.strict:
            context.verify_shards_option_validity(job, instances)
        api = context.get_api(job.cluster)
        config = (context.get_job_config(job, context.options.config)
                  if context.options.config else None)
        updater_config = UpdaterConfig(
            context.options.batch_size, context.options.restart_threshold,
            context.options.watch_secs,
            context.options.max_per_instance_failures,
            context.options.max_total_failures,
            context.options.rollback_on_failure)
        resp = api.restart(job,
                           instances,
                           updater_config,
                           context.options.healthcheck_interval_seconds,
                           config=config)

        if resp.responseCode != ResponseCode.OK:
            context.print_err("Error restarting job %s; see log for details" %
                              str(job))
        else:
            context.print_out("Job %s restarted successfully" % str(job))
        context.check_and_log_response(resp)
        if context.options.open_browser:
            context.open_job_page(api, context.options.jobspec)
        return EXIT_OK
Ejemplo n.º 3
0
def really_restart(args, options):
    if options.max_total_failures < 0:
        print("max_total_failures option must be >0, but you specified %s" %
              options.max_total_failures,
              file=sys.stderr)
        exit(1)
    maybe_disable_hooks(options)
    api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
        args, options, make_client_factory())
    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    updater_config = UpdaterConfig(options.batch_size,
                                   options.restart_threshold,
                                   options.watch_secs,
                                   options.max_per_shard_failures,
                                   options.max_total_failures)
    resp = api.restart(job_key,
                       options.shards,
                       updater_config,
                       options.health_check_interval_seconds,
                       config=config)
    check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role,
                job_key.env, job_key.name)
Ejemplo n.º 4
0
from apache.aurora.common.aurora_job_key import AuroraJobKey

from gen.apache.aurora.api.AuroraSchedulerManager import Client as scheduler_client
from gen.apache.aurora.api.ttypes import (AssignedTask, Response, ResponseCode,
                                          Result, ScheduledTask,
                                          ScheduleStatus, ScheduleStatusResult,
                                          TaskConfig)

SESSION_KEY = 'test_session'
CLUSTER = 'smfd'
JOB = AuroraJobKey(CLUSTER, 'johndoe', 'test', 'test_job')
HEALTH_CHECK_INTERVAL_SECONDS = 5
UPDATER_CONFIG = UpdaterConfig(
    batch_size=2,
    restart_threshold=23,
    watch_secs=45,
    max_per_shard_failures=0,
    max_total_failures=0,
    rollback_on_failure=True,
)


class TestRestarter(MoxTestBase):
    def setUp(self):
        super(TestRestarter, self).setUp()

        self.mock_scheduler = self.mox.CreateMock(scheduler_client)
        self.mock_instance_watcher = self.mox.CreateMock(InstanceWatcher)
        self.lock = None

        self.restarter = Restarter(
            JOB, UPDATER_CONFIG, HEALTH_CHECK_INTERVAL_SECONDS,