Ejemplo n.º 1
0
 def assert_kill_calls(cls, api, instance_range=None, instances=None):
     if instances:
         kill_calls = [
             call(AuroraJobKey.from_path(cls.TEST_JOBSPEC), instances)
         ]
     else:
         kill_calls = [
             call(AuroraJobKey.from_path(cls.TEST_JOBSPEC), [i])
             for i in instance_range
         ]
     assert api.kill_job.mock_calls == kill_calls
Ejemplo n.º 2
0
  def test_kill_job_with_instances_batched_large(self):
    """Test kill client-side API logic."""
    mock_context = FakeAuroraCommandContext()
    with contextlib.nested(
        patch('threading._Event.wait'),
        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
      api = mock_context.get_api('west')
      status_result = self.create_status_call_result()
      mock_context.add_expected_status_query_result(status_result)
      api.kill_job.return_value = self.get_kill_job_response()
      mock_context.add_expected_status_query_result(self.create_status_call_result(
          self.create_mock_task(ScheduleStatus.KILLED)))

      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        cmd.execute(['job', 'kill', '--config=%s' % fp.name, 'west/bozo/test/hello/0,2,4-13'])

      # Now check that the right API calls got made.
      assert api.kill_job.call_count == 3
      api.kill_job.assert_called_with(AuroraJobKey.from_path('west/bozo/test/hello'),
          [12, 13])
      # Expect total 5 calls (3 from JobMonitor).
      self.assert_scheduler_called(api, self.get_expected_task_query([12, 13]), 5)
Ejemplo n.º 3
0
  def setUp(self):
    self.RETURN_VALUE = 'foo'
    test_obj = self

    class FakeAuroraClientAPI(object):

      def kill_job(self, job_key, instances=None, lock=None):
        test_obj.API_CALL = functools.partial(self.kill_job, job_key, instances, lock)
        return test_obj.RETURN_VALUE

      def restart(self, job_key, shards, restart_settings):
        test_obj.API_CALL = functools.partial(self.restart, job_key, shards, restart_settings)
        return test_obj.RETURN_VALUE

      def start_cronjob(self, job_key):
        test_obj.API_CALL = functools.partial(self.start_cronjob, job_key)
        return test_obj.RETURN_VALUE

    self._patch_bases(NonHookedAuroraClientAPI, (FakeAuroraClientAPI, ))
    self.api = NonHookedAuroraClientAPI()

    # Test args passed in to check that these are proxied un-modified
    self.test_job_key = AuroraJobKey.from_path('a/b/c/d')
    self.test_config = 'bar'
    self.test_shards = 'baz'
    self.test_lock = 'lock'
    self.health_check_interval_seconds = 'baa'
Ejemplo n.º 4
0
  def disambiguate_args_or_die(cls, args, options, client_factory=AuroraClientAPI):
    """
    Returns a (AuroraClientAPI, AuroraJobKey, AuroraConfigFile:str) tuple
    if one can be found given the args, potentially querying the scheduler with the returned client.
    Calls die() with an appropriate error message otherwise.

    Arguments:
      args: args from app command invocation.
      options: options from app command invocation. must have env and cluster attributes.
      client_factory: a callable (cluster) -> AuroraClientAPI.
    """
    if not len(args) > 0:
      die('job path is required')
    try:
      job_key = AuroraJobKey.from_path(args[0])
      client = client_factory(job_key.cluster)
      config_file = args[1] if len(args) > 1 else None  # the config for hooks
      return client, job_key, config_file
    except AuroraJobKey.Error:
      log.warning("Failed to parse job path, falling back to compatibility mode")
      role = args[0] if len(args) > 0 else None
      name = args[1] if len(args) > 1 else None
      env = None
      config_file = None  # deprecated form does not support hooks functionality
      cluster = options.cluster
      if not cluster:
        die('cluster is required')
      client = client_factory(cluster)
      return client, cls._disambiguate_or_die(client, role, env, name), config_file
Ejemplo n.º 5
0
 def create_probe_hosts(cls, num_hosts, predicted, safe, safe_in):
     hosts = defaultdict(list)
     for i in range(num_hosts):
         host_name = "h%s" % i
         job = AuroraJobKey.from_path("west/role/env/job%s" % i)
         hosts[host_name].append(DomainUpTimeSlaVector.JobUpTimeDetails(job, predicted, safe, safe_in))
     return hosts
Ejemplo n.º 6
0
 def create_hosts(cls, num_hosts, percentage, duration):
     hosts = defaultdict(list)
     for i in range(num_hosts):
         host_name = 'h%s' % i
         job = AuroraJobKey.from_path('west/role/env/job%s' % i)
         hosts[host_name].append(JobUpTimeLimit(job, percentage, duration))
     return [hosts]
Ejemplo n.º 7
0
  def test_start_update_command_line_succeeds(self):
    mock_context = FakeAuroraCommandContext()
    resp = self.create_simple_success_response()
    resp.result = Result(startJobUpdateResult=StartJobUpdateResult(updateId="id"))
    with contextlib.nested(
        patch('apache.aurora.client.cli.update.Update.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
      mock_api = mock_context.get_api('west')
      mock_api.start_job_update.return_value = resp
      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        result = cmd.execute(['beta-update', 'start', self.TEST_JOBSPEC, fp.name])
        assert result == EXIT_OK

      update_url_msg = StartUpdate.UPDATE_MSG_TEMPLATE % (
          mock_context.get_update_page(mock_api, AuroraJobKey.from_path(self.TEST_JOBSPEC), "id"))

      assert mock_api.start_job_update.call_count == 1
      args, kwargs = mock_api.start_job_update.call_args
      assert isinstance(args[0], AuroraConfig)
      assert args[1] is None
      assert mock_context.get_out() == [update_url_msg]
      assert mock_context.get_err() == []
Ejemplo n.º 8
0
  def test_killall_job_wait_until_timeout(self):
    """Test kill client-side API logic."""
    mock_context = FakeAuroraCommandContext()
    mock_scheduler_proxy = create_autospec(spec=SchedulerThriftApiSpec, instance=True)
    with contextlib.nested(
        patch('threading._Event.wait'),
        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):

      api = mock_context.get_api('west')
      mock_scheduler_proxy.getTasksWithoutConfigs.return_value = self.create_status_call_result()
      api.kill_job.return_value = self.get_kill_job_response()
      mock_scheduler_proxy.killTasks.return_value = self.get_kill_job_response()
      for _ in range(8):
        mock_context.add_expected_status_query_result(self.create_status_call_result(
            self.create_mock_task(ScheduleStatus.RUNNING)))

      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        assert EXIT_TIMEOUT == cmd.execute(
            ['job', 'killall', '--no-batching', '--config=%s' % fp.name, 'west/bozo/test/hello'])

      # Now check that the right API calls got made.
      assert api.kill_job.call_count == 1
      api.kill_job.assert_called_with(AuroraJobKey.from_path('west/bozo/test/hello'), None)
      self.assert_scheduler_called(api, self.get_expected_task_query(), 8)
Ejemplo n.º 9
0
  def test_start_update_command_line_succeeds(self):
    mock_context = FakeAuroraCommandContext()
    resp = self.create_simple_success_response()
    resp.result = Result(startJobUpdateResult=StartJobUpdateResult(updateId="id"))
    with contextlib.nested(
        patch('apache.aurora.client.cli.update.Update.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
      mock_api = mock_context.get_api(self.TEST_CLUSTER)
      mock_api.start_job_update.return_value = resp
      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        result = cmd.execute(['beta-update', 'start', self.TEST_JOBSPEC, fp.name])
        assert result == EXIT_OK

      update_url_msg = StartUpdate.UPDATE_MSG_TEMPLATE % (
          mock_context.get_update_page(mock_api, AuroraJobKey.from_path(self.TEST_JOBSPEC), "id"))

      assert mock_api.start_job_update.call_count == 1
      args, kwargs = mock_api.start_job_update.call_args
      assert isinstance(args[0], AuroraConfig)
      assert args[1] is None
      assert mock_context.get_out() == [update_url_msg]
      assert mock_context.get_err() == []
Ejemplo n.º 10
0
def get_job_config(job_spec, config_file, options):
    try:
        job_key = AuroraJobKey.from_path(job_spec)
        select_cluster = job_key.cluster
        select_env = job_key.env
        select_role = job_key.role
        jobname = job_key.name
    except AuroraJobKey.Error:
        deprecation_warning(
            'Please refer to your job in CLUSTER/ROLE/ENV/NAME format.')
        select_cluster = options.cluster if options.cluster else None
        select_env = options.env
        select_role = None
        jobname = job_spec
    try:
        json_option = options.json
    except AttributeError:
        json_option = False
    try:
        bindings = options.bindings
    except AttributeError:
        bindings = ()
    return get_config(jobname,
                      config_file,
                      json_option,
                      bindings,
                      select_cluster=select_cluster,
                      select_role=select_role,
                      select_env=select_env)
Ejemplo n.º 11
0
  def test_killall_job(self):
    """Test kill client-side API logic."""
    mock_context = FakeAuroraCommandContext()
    mock_scheduler_proxy = Mock()
    with contextlib.nested(
        patch('time.sleep'),
        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):

      api = mock_context.get_api('west')
      mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_call_result()
      api.kill_job.return_value = self.get_kill_job_response()
      mock_scheduler_proxy.killTasks.return_value = self.get_kill_job_response()
      mock_context.add_expected_status_query_result(self.create_status_call_result(
          self.create_mock_task(ScheduleStatus.KILLED)))
      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        cmd.execute(['job', 'killall', '--no-batching', '--config=%s' % fp.name,
            'west/bozo/test/hello'])

      # Now check that the right API calls got made.
      assert api.kill_job.call_count == 1
      api.kill_job.assert_called_with(AuroraJobKey.from_path('west/bozo/test/hello'), None)
      self.assert_scheduler_called(api, self.get_expected_task_query(), 2)
Ejemplo n.º 12
0
def really_killall(args, options):
    """Helper for testing purposes: make it easier to mock out the actual kill process,
  while testing hooks in the command dispatch process.
  """
    maybe_disable_hooks(options)
    job_key = AuroraJobKey.from_path(args[0])
    config_file = args[1] if len(args) > 1 else None  # the config for hooks
    new_cmd = ["job", "killall", args[0]]
    if config_file is not None:
        new_cmd.append("--config=%s" % config_file)
    if options.open_browser:
        new_cmd.append("--open-browser")
    if options.batch_size is not None:
        new_cmd.append("--batch-size=%s" % options.batch_size)
    if options.max_total_failures is not None:
        new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
    v1_deprecation_warning("killall", new_cmd)

    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    api = make_client(job_key.cluster)
    if options.batch_size is not None:
        kill_in_batches(api, job_key, None, options.batch_size,
                        options.max_failures_option)
    else:
        resp = api.kill_job(job_key, None, config=config)
        check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role,
                job_key.env, job_key.name)
    wait_kill_tasks(api.scheduler_proxy, job_key)
Ejemplo n.º 13
0
def really_killall(args, options):
  """Helper for testing purposes: make it easier to mock out the actual kill process,
  while testing hooks in the command dispatch process.
  """
  maybe_disable_hooks(options)
  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  new_cmd = ["job", "killall", args[0]]
  if config_file is not None:
    new_cmd.append("--config=%s" % config_file)
  if options.open_browser:
    new_cmd.append("--open-browser")
  if options.batch_size is not None:
    new_cmd.append("--batch-size=%s" % options.batch_size)
  if options.max_total_failures is not None:
    new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
  v1_deprecation_warning("killall", new_cmd)

  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  if options.batch_size is not None:
    kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option)
  else:
    resp = api.kill_job(job_key, None, config=config)
    check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
  wait_kill_tasks(api.scheduler_proxy, job_key)
Ejemplo n.º 14
0
 def create_probe_hosts(cls, num_hosts, predicted, safe, safe_in):
   hosts = defaultdict(list)
   for i in range(num_hosts):
     host_name = 'h%s' % i
     job = AuroraJobKey.from_path('west/role/env/job%s' % i)
     hosts[host_name].append(JobUpTimeDetails(job, predicted, safe, safe_in))
   return [hosts]
Ejemplo n.º 15
0
def run(args, options):
    """usage: run cluster/role/env/job cmd

  Runs a shell command on all machines currently hosting shards of a single job.

  This feature supports the same command line wildcards that are used to
  populate a job's commands.

  This means anything in the {{mesos.*}} and {{thermos.*}} namespaces.
  """
    # TODO(William Farner): Add support for invoking on individual shards.
    # TODO(Kevin Sweeney): Restore the ability to run across jobs with globs (See MESOS-3010).
    if not args:
        die("job path is required")
    job_path = args.pop(0)
    new_cmd = ["task", "run"]
    instances_spec = job_path
    if options.num_threads != 1:
        new_cmd.append("--threads=%s" % options.num_threads)
    if options.ssh_user is not None:
        new_cmd.append("--ssh-user=%s" % options.ssh_user)
    if options.executor_sandbox:
        new_cmd.append("--executor-sandbox")
    new_cmd.append('"%s"' % " ".join(args))
    v1_deprecation_warning("ssh", new_cmd)

    try:
        cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
    except AuroraJobKey.Error as e:
        die('Invalid job path "%s": %s' % (job_path, e))

    command = " ".join(args)
    cluster = CLUSTERS[cluster_name]
    dcr = DistributedCommandRunner(cluster, role, env, [name], options.ssh_user)
    dcr.run(command, parallelism=options.num_threads, executor_sandbox=options.executor_sandbox)
Ejemplo n.º 16
0
  def test_safe_domain_override_jobs(self):
    """Test successful execution of the sla_list_safe_domain command with override_jobs option."""
    mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
    with temporary_file() as fp:
      fp.write('west/role/env/job1 30 200s')
      fp.flush()
      mock_options = self.setup_mock_options(override=fp.name)
      with contextlib.nested(
          patch('apache.aurora.client.commands.admin.AuroraClientAPI',
              new=create_autospec(spec=AuroraClientAPI)),
          patch('apache.aurora.client.commands.admin.print_results'),
          patch('apache.aurora.client.commands.admin.CLUSTERS', new=self.TEST_CLUSTERS),
          patch('twitter.common.app.get_options', return_value=mock_options)
      ) as (
          mock_api,
          mock_print_results,
          test_clusters,
          mock_options):

        mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector

        sla_list_safe_domain(['west', '50', '100s'])

        job_key = AuroraJobKey.from_path('west/role/env/job1')
        override = {job_key: JobUpTimeLimit(job_key, 30, 200)}
        mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, override, DEFAULT_GROUPING)
        mock_print_results.assert_called_once_with(['h0', 'h1', 'h2'])
Ejemplo n.º 17
0
    def test_safe_domain_override_jobs(self):
        """Test successful execution of the sla_list_safe_domain command with override_jobs option."""
        mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
        with temporary_file() as fp:
            fp.write('west/role/env/job1 30 200s')
            fp.flush()
            mock_options = self.setup_mock_options(override=fp.name)
            with contextlib.nested(
                    patch(
                        'apache.aurora.client.commands.admin.AuroraClientAPI',
                        new=Mock(spec=AuroraClientAPI)),
                    patch('apache.aurora.client.commands.admin.print_results'),
                    patch('apache.aurora.client.commands.admin.CLUSTERS',
                          new=self.TEST_CLUSTERS),
                    patch('twitter.common.app.get_options',
                          return_value=mock_options)) as (mock_api,
                                                          mock_print_results,
                                                          test_clusters,
                                                          mock_options):

                mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector

                sla_list_safe_domain(['west', '50', '100s'])

                job_key = AuroraJobKey.from_path('west/role/env/job1')
                override = {job_key: JobUpTimeLimit(job_key, 30, 200)}
                mock_vector.get_safe_hosts.assert_called_once_with(
                    50.0, 100.0, override, DEFAULT_GROUPING)
                mock_print_results.assert_called_once_with(['h0', 'h1', 'h2'])
Ejemplo n.º 18
0
    def test_kill_job_with_instances_batched_large(self):
        """Test kill client-side API logic."""
        mock_context = FakeAuroraCommandContext()
        with contextlib.nested(
                patch('threading._Event.wait'),
                patch('apache.aurora.client.cli.jobs.Job.create_context',
                      return_value=mock_context),
                patch('apache.aurora.client.factory.CLUSTERS',
                      new=self.TEST_CLUSTERS)):
            api = mock_context.get_api('west')
            status_result = self.create_status_call_result()
            mock_context.add_expected_status_query_result(status_result)
            api.kill_job.return_value = self.get_kill_job_response()
            mock_context.add_expected_status_query_result(
                self.create_status_call_result(
                    self.create_mock_task(ScheduleStatus.KILLED)))

            with temporary_file() as fp:
                fp.write(self.get_valid_config())
                fp.flush()
                cmd = AuroraCommandLine()
                cmd.execute([
                    'job', 'kill',
                    '--config=%s' % fp.name, 'west/bozo/test/hello/0,2,4-13'
                ])

            # Now check that the right API calls got made.
            assert api.kill_job.call_count == 3
            api.kill_job.assert_called_with(
                AuroraJobKey.from_path('west/bozo/test/hello'), [12, 13])
            # Expect total 5 calls (3 from JobMonitor).
            self.assert_scheduler_called(
                api, self.get_expected_task_query([12, 13]), 5)
Ejemplo n.º 19
0
    def setUp(self):
        self.RETURN_VALUE = "foo"
        test_obj = self

        class FakeAuroraClientAPI(object):
            def kill_job(self, job_key, instances=None, lock=None):
                test_obj.API_CALL = functools.partial(self.kill_job, job_key, instances, lock)
                return test_obj.RETURN_VALUE

            def restart(self, job_key, shards, restart_settings):
                test_obj.API_CALL = functools.partial(self.restart, job_key, shards, restart_settings)
                return test_obj.RETURN_VALUE

            def start_cronjob(self, job_key):
                test_obj.API_CALL = functools.partial(self.start_cronjob, job_key)
                return test_obj.RETURN_VALUE

        self._patch_bases(NonHookedAuroraClientAPI, (FakeAuroraClientAPI,))
        self.api = NonHookedAuroraClientAPI()

        # Test args passed in to check that these are proxied un-modified
        self.test_job_key = AuroraJobKey.from_path("a/b/c/d")
        self.test_config = "bar"
        self.test_shards = "baz"
        self.test_lock = "lock"
        self.health_check_interval_seconds = "baa"
Ejemplo n.º 20
0
    def test_killall_job(self):
        """Test kill client-side API logic."""
        mock_context = FakeAuroraCommandContext()
        mock_scheduler_proxy = Mock()
        with contextlib.nested(
                patch('threading._Event.wait'),
                patch('apache.aurora.client.cli.jobs.Job.create_context',
                      return_value=mock_context),
                patch('apache.aurora.client.factory.CLUSTERS',
                      new=self.TEST_CLUSTERS)):

            api = mock_context.get_api('west')
            mock_scheduler_proxy.getTasksWithoutConfigs.return_value = self.create_status_call_result(
            )
            api.kill_job.return_value = self.get_kill_job_response()
            mock_scheduler_proxy.killTasks.return_value = self.get_kill_job_response(
            )
            mock_context.add_expected_status_query_result(
                self.create_status_call_result(
                    self.create_mock_task(ScheduleStatus.KILLED)))
            with temporary_file() as fp:
                fp.write(self.get_valid_config())
                fp.flush()
                cmd = AuroraCommandLine()
                cmd.execute([
                    'job', 'killall', '--no-batching',
                    '--config=%s' % fp.name, 'west/bozo/test/hello'
                ])

            # Now check that the right API calls got made.
            assert api.kill_job.call_count == 1
            api.kill_job.assert_called_with(
                AuroraJobKey.from_path('west/bozo/test/hello'), None)
            self.assert_scheduler_called(api, self.get_expected_task_query(),
                                         2)
Ejemplo n.º 21
0
 def create_hosts(cls, num_hosts, percentage, duration):
   hosts = defaultdict(list)
   for i in range(num_hosts):
     host_name = 'h%s' % i
     job = AuroraJobKey.from_path('west/role/env/job%s' % i)
     hosts[host_name].append(JobUpTimeLimit(job, percentage, duration))
   return [hosts]
Ejemplo n.º 22
0
  def setUp(self):
    self.RETURN_VALUE = 'foo'
    test_obj = self

    class FakeAuroraClientAPI(object):

      def cancel_update(self, job_key):
        test_obj.API_CALL = functools.partial(self.cancel_update, job_key)
        return test_obj.RETURN_VALUE

      def kill_job(self, job_key, instances=None, lock=None):
        test_obj.API_CALL = functools.partial(self.kill_job, job_key, instances, lock)
        return test_obj.RETURN_VALUE

      def restart(self, job_key, shards, updater_config, health_check_interval_seconds):
        test_obj.API_CALL = functools.partial(self.restart, job_key, shards,
            updater_config, health_check_interval_seconds)
        return test_obj.RETURN_VALUE

      def start_cronjob(self, job_key):
        test_obj.API_CALL = functools.partial(self.start_cronjob, job_key)
        return test_obj.RETURN_VALUE

    self._patch_bases(NonHookedAuroraClientAPI, (FakeAuroraClientAPI, ))
    self.api = NonHookedAuroraClientAPI()

    # Test args passed in to check that these are proxied un-modified
    self.test_job_key = AuroraJobKey.from_path('a/b/c/d')
    self.test_config = 'bar'
    self.test_shards = 'baz'
    self.test_lock = 'lock'
    self.test_updater_config = 'blah'
    self.health_check_interval_seconds = 'baa'
Ejemplo n.º 23
0
def get_job_config(job_spec, config_file, options):
  try:
    job_key = AuroraJobKey.from_path(job_spec)
    select_cluster = job_key.cluster
    select_env = job_key.env
    select_role = job_key.role
    jobname = job_key.name
  except AuroraJobKey.Error:
    deprecation_warning('Please refer to your job in CLUSTER/ROLE/ENV/NAME format.')
    select_cluster = options.cluster if options.cluster else None
    select_env = options.env
    select_role = None
    jobname = job_spec
  try:
    json_option = options.json
  except AttributeError:
    json_option = False
  try:
    bindings = options.bindings
  except AttributeError:
    bindings = ()
  return get_config(
      jobname,
      config_file,
      json_option,
      bindings,
      select_cluster=select_cluster,
      select_role=select_role,
      select_env=select_env)
Ejemplo n.º 24
0
 def create_probe_hosts(cls, num_hosts, predicted, safe, safe_in):
   hosts = defaultdict(list)
   for i in range(num_hosts):
     host_name = 'h%s' % i
     job = AuroraJobKey.from_path('west/role/env/job%s' % i)
     hosts[host_name].append(JobUpTimeDetails(job, predicted, safe, safe_in))
   return [hosts]
Ejemplo n.º 25
0
 def create_hosts(cls, num_hosts, percentage, duration):
     hosts = defaultdict(list)
     for i in range(num_hosts):
         host_name = "h%s" % i
         job = AuroraJobKey.from_path("west/role/env/job%s" % i)
         hosts[host_name].append(DomainUpTimeSlaVector.JobUpTimeLimit(job, percentage, duration))
     return hosts
Ejemplo n.º 26
0
  def test_killall_job_something_else(self):
    """Test kill client-side API logic."""
    mock_context = FakeAuroraCommandContext()
    mock_scheduler_proxy = create_autospec(spec=SchedulerThriftApiSpec, instance=True)
    with contextlib.nested(
        patch('threading._Event.wait'),
        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):

      api = mock_context.get_api('west')
      api.kill_job.return_value = self.get_kill_job_response()
      mock_context.add_expected_status_query_result(self.create_status_call_result())
      mock_scheduler_proxy.killTasks.return_value = self.get_kill_job_response()
      mock_context.add_expected_status_query_result(self.create_status_call_result(
          self.create_mock_task(ScheduleStatus.KILLED)))
      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        cmd.execute(['job', 'killall', '--config=%s' % fp.name, 'west/bozo/test/hello'])

      # Now check that the right API calls got made.
      assert api.kill_job.call_count == 4
      instances = [15, 16, 17, 18, 19]
      api.kill_job.assert_called_with(AuroraJobKey.from_path('west/bozo/test/hello'), instances)
      self.assert_scheduler_called(api, self.get_expected_task_query(instances), 6)
Ejemplo n.º 27
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')

  newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)]
  if len(options.tunnels) > 0:
    newcmd.append("--tunnels=%s" % options.tunnels)
  if options.ssh_user is not None:
    newcmd.append("--ssh-user=%s" % options.ssh_user)
  if options.executor_sandbox:
    newcmd.append("--executor-sandbox")
  if len(args) > 0:
    newcmd.append("--command=\"%s\"" % " ".join(args))
  v1_deprecation_warning("ssh", newcmd)

  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']


  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
Ejemplo n.º 28
0
def killall(args, options):
  """usage: killall cluster/role/env/job
  Kills all tasks in a running job, blocking until all specified tasks have been terminated.
  """

  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  resp = api.kill_job(job_key, None, config=config)
  check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
Ejemplo n.º 29
0
def killall(args, options):
  """usage: killall cluster/role/env/job
  Kills all tasks in a running job, blocking until all specified tasks have been terminated.
  """
  maybe_disable_hooks(options)
  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  if options.batch_size is not None:
    kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option)
  else:
    resp = api.kill_job(job_key, None, config=config)
    check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
  wait_kill_tasks(api.scheduler_proxy, job_key)
Ejemplo n.º 30
0
    def parse_jobs_file(filename):
        result = {}
        with open(filename, 'r') as overrides:
            for line in overrides:
                if not line.strip():
                    continue

                tokens = line.split()
                if len(tokens) != 3:
                    die('Invalid line in %s:%s' % (filename, line))
                job_key = AuroraJobKey.from_path(tokens[0])
                result[job_key] = JobUpTimeLimit(
                    job=job_key,
                    percentage=parse_sla_percentage(tokens[1]),
                    duration_secs=parse_time(tokens[2]).as_(Time.SECONDS))
        return result
Ejemplo n.º 31
0
  def parse_jobs_file(filename):
    result = {}
    with open(filename, 'r') as overrides:
      for line in overrides:
        if not line.strip():
          continue

        tokens = line.split()
        if len(tokens) != 3:
          die('Invalid line in %s:%s' % (filename, line))
        job_key = AuroraJobKey.from_path(tokens[0])
        result[job_key] = JobUpTimeLimit(
            job=job_key,
            percentage=parse_sla_percentage(tokens[1]),
            duration_secs=parse_time(tokens[2]).as_(Time.SECONDS)
        )
    return result
Ejemplo n.º 32
0
  def test_kill_job_with_instances(self):
    """Test kill client-side API logic."""
    mock_context = FakeAuroraCommandContext()
    with contextlib.nested(
        patch('apache.aurora.client.cli.jobs.Job.create_context', return_value=mock_context),
        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
      api = mock_context.get_api('west')
      api.kill_job.return_value = self.get_kill_job_response()
      with temporary_file() as fp:
        fp.write(self.get_valid_config())
        fp.flush()
        cmd = AuroraCommandLine()
        cmd.execute(['job', 'kill', '--config=%s' % fp.name, '--instances=0,2,4-6',
           'west/mchucarroll/test/hello'])

      # Now check that the right API calls got made.
      assert api.kill_job.call_count == 1
      api.kill_job.assert_called_with(AuroraJobKey.from_path('west/mchucarroll/test/hello'),
          [0, 2, 4, 5, 6])
Ejemplo n.º 33
0
    def test_safe_domain_override_jobs(self):
        """Test successful execution of the sla_list_safe_domain command with override_jobs option."""
        mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
        with temporary_file() as fp:
            fp.write("west/role/env/job1 30 200s")
            fp.flush()
            mock_options = self.setup_mock_options(override=fp.name)
            with contextlib.nested(
                patch("apache.aurora.client.commands.admin.AuroraClientAPI", new=Mock(spec=AuroraClientAPI)),
                patch("apache.aurora.client.commands.admin.print_results"),
                patch("apache.aurora.client.commands.admin.CLUSTERS", new=self.TEST_CLUSTERS),
                patch("twitter.common.app.get_options", return_value=mock_options),
            ) as (mock_api, mock_print_results, test_clusters, mock_options):

                mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector

                sla_list_safe_domain(["west", "50", "100s"])

                job_key = AuroraJobKey.from_path("west/role/env/job1")
                override = {job_key: DomainUpTimeSlaVector.JobUpTimeLimit(job_key, 30, 200)}
                mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, override)
                mock_print_results.assert_called_once_with(["h0", "h1", "h2"])
Ejemplo n.º 34
0
def run(args, options):
    """usage: run cluster/role/env/job cmd

  Runs a shell command on all machines currently hosting shards of a single job.

  This feature supports the same command line wildcards that are used to
  populate a job's commands.

  This means anything in the {{mesos.*}} and {{thermos.*}} namespaces.
  """
    # TODO(William Farner): Add support for invoking on individual shards.
    # TODO(Kevin Sweeney): Restore the ability to run across jobs with globs (See MESOS-3010).
    if not args:
        die('job path is required')
    job_path = args.pop(0)
    new_cmd = ["task", "run"]
    instances_spec = job_path
    if options.num_threads != 1:
        new_cmd.append("--threads=%s" % options.num_threads)
    if options.ssh_user is not None:
        new_cmd.append("--ssh-user=%s" % options.ssh_user)
    if options.executor_sandbox:
        new_cmd.append("--executor-sandbox")
    new_cmd.append("\"%s\"" % " ".join(args))
    v1_deprecation_warning("ssh", new_cmd)

    try:
        cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
    except AuroraJobKey.Error as e:
        die('Invalid job path "%s": %s' % (job_path, e))

    command = ' '.join(args)
    cluster = CLUSTERS[cluster_name]
    dcr = DistributedCommandRunner(cluster, role, env, [name],
                                   options.ssh_user)
    dcr.run(command,
            parallelism=options.num_threads,
            executor_sandbox=options.executor_sandbox)
Ejemplo n.º 35
0
def run(args, options):
  """usage: run cluster/role/env/job cmd

  Runs a shell command on all machines currently hosting shards of a single job.

  This feature supports the same command line wildcards that are used to
  populate a job's commands.

  This means anything in the {{mesos.*}} and {{thermos.*}} namespaces.
  """
  # TODO(William Farner): Add support for invoking on individual shards.
  # TODO(Kevin Sweeney): Restore the ability to run across jobs with globs (See MESOS-3010).
  if not args:
    die('job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))

  command = ' '.join(args)
  cluster = CLUSTERS[cluster_name]
  dcr = DistributedCommandRunner(cluster, role, env, [name], options.ssh_user)
  dcr.run(command, parallelism=options.num_threads, executor_sandbox=options.executor_sandbox)
Ejemplo n.º 36
0
def jobkeytype(v):
    """wrapper for AuroraJobKey.from_path that improves error messages"""
    return AuroraJobKey.from_path(v)
Ejemplo n.º 37
0
 def test_basic(self):
   AuroraJobKey.from_path("smf1/mesos/test/labrat")
Ejemplo n.º 38
0
 def create_probe_hosts(cls, hostname, predicted, safe, safe_in):
     hosts = defaultdict(list)
     job = AuroraJobKey.from_path('west/role/env/job-%s' % hostname)
     hosts[hostname].append(JobUpTimeDetails(job, predicted, safe, safe_in))
     return [hosts]
Ejemplo n.º 39
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')

  newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)]
  if len(options.tunnels) > 0:
    newcmd.append("--tunnels=%s" % options.tunnels)
  if options.ssh_user is not None:
    newcmd.append("--ssh-user=%s" % options.ssh_user)
  if options.executor_sandbox:
    newcmd.append("--executor-sandbox")
  if len(args) > 0:
    newcmd.append("--command=\"%s\"" % " ".join(args))
  v1_deprecation_warning("ssh", newcmd)

  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  if (resp.result.scheduleStatusResult.tasks is None or
      len(resp.result.scheduleStatusResult.tasks) == 0):
    die("Job %s not found" % job_path)
  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']


  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
Ejemplo n.º 40
0
 def assert_kill_call_no_instances(cls, api):
     assert api.kill_job.mock_calls == call(
         (AuroraJobKey.from_path(cls.TEST_JOBSPEC), None))
Ejemplo n.º 41
0
def parse_aurora_job_key_into(option, opt, value, parser):
  try:
    setattr(parser.values, option.dest, AuroraJobKey.from_path(value))
  except AuroraJobKey.Error as e:
    raise optparse.OptionValueError('Failed to parse: %s' % e)
Ejemplo n.º 42
0
 def create_probe_hosts(cls, hostname, predicted, safe, safe_in):
   hosts = defaultdict(list)
   job = AuroraJobKey.from_path('west/role/env/job-%s' % hostname)
   hosts[hostname].append(JobUpTimeDetails(job, predicted, safe, safe_in))
   return [hosts]
Ejemplo n.º 43
0
 def assert_kill_calls(cls, api, instance_range=None, instances=None):
   if instances:
     kill_calls = [call(AuroraJobKey.from_path(cls.TEST_JOBSPEC), instances)]
   else:
     kill_calls = [call(AuroraJobKey.from_path(cls.TEST_JOBSPEC), [i]) for i in instance_range]
   assert api.kill_job.mock_calls == kill_calls
Ejemplo n.º 44
0
 def assert_kill_call_no_instances(cls, api):
   assert api.kill_job.mock_calls == call((AuroraJobKey.from_path(cls.TEST_JOBSPEC), None))
Ejemplo n.º 45
0
def parse_aurora_job_key_into(option, opt, value, parser):
    try:
        setattr(parser.values, option.dest, AuroraJobKey.from_path(value))
    except AuroraJobKey.Error as e:
        raise optparse.OptionValueError("Failed to parse: %s" % e)
Ejemplo n.º 46
0
def jobkeytype(v):
  """wrapper for AuroraJobKey.from_path that improves error messages"""
  return AuroraJobKey.from_path(v)