Exemple #1
0
 def assert_correct_status_calls(cls, api):
     # getTasksStatus gets called a lot of times. The exact number isn't fixed; it loops
     # over the health checks until all of them pass for a configured period of time.
     # The minumum number of calls is 5: once before the tasks are restarted, and then
     # once for each batch of restarts (Since the batch size is set to 5, and the
     # total number of jobs is 20, that's 4 batches.)
     assert api.getTasksStatus.call_count >= 5
     # In the first getStatus call, it uses an expansive query; in the rest, it only queries for
     # status RUNNING.
     status_calls = api.getTasksStatus.call_args_list
     assert status_calls[0][0][0] == TaskQuery(taskIds=None,
                                               jobName='hello',
                                               environment='test',
                                               owner=Identity(
                                                   role=u'mchucarroll',
                                                   user=None),
                                               statuses=cls.QUERY_STATUSES)
     for status_call in status_calls[1:]:
         status_call[0][0] == TaskQuery(taskIds=None,
                                        jobName='hello',
                                        environment='test',
                                        owner=Identity(role='mchucarroll',
                                                       user=None),
                                        statuses=set(
                                            [ScheduleStatus.RUNNING]))
Exemple #2
0
 def create_mock_scheduled_tasks(cls):
     jobs = []
     for name in ['foo', 'bar', 'baz']:
         job = Mock()
         job.key = JobKey(role=cls.TEST_ROLE,
                          environment=cls.TEST_ENV,
                          name=name)
         job.failure_count = 0
         job.assignedTask = Mock(spec=AssignedTask)
         job.assignedTask.taskId = 1287391823
         job.assignedTask.slaveHost = 'slavehost'
         job.assignedTask.task = Mock(spec=TaskConfig)
         job.assignedTask.task.executorConfig = Mock()
         job.assignedTask.task.maxTaskFailures = 1
         job.assignedTask.task.packages = []
         job.assignedTask.task.owner = Identity(role='mchucarroll')
         job.assignedTask.task.environment = 'test'
         job.assignedTask.task.jobName = 'woops'
         job.assignedTask.task.numCpus = 2
         job.assignedTask.task.ramMb = 2
         job.assignedTask.task.diskMb = 2
         job.assignedTask.instanceId = 4237894
         job.assignedTask.assignedPorts = {}
         job.status = ScheduleStatus.RUNNING
         mockEvent = Mock(spec=TaskEvent)
         mockEvent.timestamp = 28234726395
         mockEvent.status = ScheduleStatus.RUNNING
         mockEvent.message = "Hi there"
         job.taskEvents = [mockEvent]
         jobs.append(job)
     return jobs
Exemple #3
0
 def get_expected_task_query(cls, shards=None):
     instance_ids = frozenset(shards) if shards is not None else None
     # Helper to create the query that will be a parameter to job kill.
     return TaskQuery(taskIds=None,
                      jobName=cls.TEST_JOB,
                      environment=cls.TEST_ENV,
                      instanceIds=instance_ids,
                      owner=Identity(role=cls.TEST_ROLE, user=None))
 def __init__(self, client, role, env, jobname):
     self._client = client
     self._query = TaskQuery(owner=Identity(role=role),
                             environment=env,
                             jobName=jobname)
     self._initial_tasks = set()
     self._initial_tasks = set(task.assignedTask.taskId
                               for task in self.iter_query())
Exemple #5
0
 def assert_correct_killtask_calls(cls, api):
     assert api.killTasks.call_count == 4
     # Check the last call's parameters.
     api.killTasks.assert_called_with(
         TaskQuery(taskIds=None,
                   jobName='hello',
                   environment='test',
                   instanceIds=frozenset([16, 17, 18, 19, 15]),
                   owner=Identity(role=u'mchucarroll', user=None),
                   statuses=cls.QUERY_STATUSES), 'foo')
 def expect_kill(self, instance_ids, response_code=None):
     response_code = ResponseCode.OK if response_code is None else response_code
     response = Response(responseCode=response_code, message='test')
     query = TaskQuery(owner=Identity(role=self._job_key.role),
                       environment=self._job_key.environment,
                       jobName=self._job_key.name,
                       statuses=ACTIVE_STATES,
                       instanceIds=frozenset([int(s)
                                              for s in instance_ids]))
     self._scheduler.killTasks(query, self._lock,
                               self._session_key).AndReturn(response)
Exemple #7
0
    def test_successful_run(self):
        """Test the run command."""
        # Calls api.check_status, which calls scheduler.getJobs
        mock_options = self.setup_mock_options()
        (mock_api, mock_scheduler) = self.create_mock_api()
        mock_scheduler.getTasksStatus.return_value = self.create_status_response(
        )
        sandbox_args = {
            'slave_root': '/slaveroot',
            'slave_run_directory': 'slaverun'
        }
        with contextlib.nested(
                patch('twitter.aurora.client.api.SchedulerProxy',
                      return_value=mock_scheduler),
                patch('twitter.aurora.client.factory.CLUSTERS',
                      new=self.TEST_CLUSTERS),
                patch('twitter.aurora.client.commands.run.CLUSTERS',
                      new=self.TEST_CLUSTERS),
                patch('twitter.common.app.get_options',
                      return_value=mock_options),
                patch(
                    'twitter.aurora.client.api.command_runner.DistributedCommandRunner.sandbox_args',
                    return_value=sandbox_args),
                patch('subprocess.Popen',
                      return_value=self.create_mock_process())) as (
                          mock_scheduler_proxy_class, mock_clusters,
                          mock_clusters_runpatch, options,
                          mock_runner_args_patch, mock_subprocess):
            run(['west/mchucarroll/test/hello', 'ls'], mock_options)

            # The status command sends a getTasksStatus query to the scheduler,
            # and then prints the result.
            mock_scheduler.getTasksStatus.assert_called_with(
                TaskQuery(jobName='hello',
                          environment='test',
                          owner=Identity(role='mchucarroll'),
                          statuses=set([
                              ScheduleStatus.RUNNING, ScheduleStatus.KILLING,
                              ScheduleStatus.RESTARTING,
                              ScheduleStatus.PREEMPTING
                          ])))

            # The mock status call returns 3 three ScheduledTasks, so three commands should have been run
            assert mock_subprocess.call_count == 3
            mock_subprocess.assert_called_with([
                'ssh', '-n', '-q', 'mchucarroll@slavehost',
                'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/'
                'slaverun/sandbox;ls'
            ],
                                               stderr=-2,
                                               stdout=-1)
Exemple #8
0
    def test_init(self):
        result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=[]))
        response = Response(responseCode=ResponseCode.OK,
                            message="test",
                            result=result)
        query = TaskQuery(owner=Identity(role=ROLE),
                          environment=ENV,
                          jobName=JOB_NAME)

        self.mock_scheduler.getTasksStatus(query).AndReturn(response)

        self.mox.ReplayAll()

        JobMonitor(self.mock_api, ROLE, ENV, JOB_NAME)
Exemple #9
0
def make_task(thermos_config, assigned_ports={}, **kw):
    role = getpass.getuser()
    task_id = thermos_config.task().name().get() + '-001'
    at = AssignedTask(taskId=task_id,
                      task=TaskConfig(executorConfig=ExecutorConfig(
                          name=AURORA_EXECUTOR_NAME,
                          data=thermos_config.json_dumps()),
                                      owner=Identity(role=role, user=role)),
                      assignedPorts=assigned_ports,
                      **kw)
    td = mesos_pb.TaskInfo()
    td.task_id.value = task_id
    td.name = thermos_config.task().name().get()
    td.data = serialize(at)
    return td
 def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None):
     response_code = ResponseCode.OK if response_code is None else response_code
     response = Response(responseCode=response_code, message='test')
     scheduled = []
     for index, task in enumerate(tasks):
         if not ignore_ids or index not in ignore_ids:
             scheduled.append(
                 ScheduledTask(assignedTask=AssignedTask(task=task,
                                                         instanceId=index)))
     response.result = Result(scheduleStatusResult=ScheduleStatusResult(
         tasks=scheduled))
     query = TaskQuery(owner=Identity(role=self._job_key.role),
                       environment=self._job_key.environment,
                       jobName=self._job_key.name,
                       statuses=ACTIVE_STATES)
     self._scheduler.getTasksStatus(query).AndReturn(response)
  def test_unsuccessful_status(self):
    """Test the status command when the user asks the status of a job that doesn't exist."""
    # Calls api.check_status, which calls scheduler.getJobs
    mock_options = self.setup_mock_options()
    (mock_api, mock_scheduler) = self.create_mock_api()
    mock_scheduler.getTasksStatus.return_value = self.create_failed_status_response()
    with contextlib.nested(
        patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler),
        patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS),
        patch('twitter.common.app.get_options', return_value=mock_options)) as (
            mock_scheduler_proxy_class,
            mock_clusters,
            options):
      self.assertRaises(SystemExit, status, ['west/mchucarroll/test/hello'], mock_options)

      mock_scheduler.getTasksStatus.assert_called_with(TaskQuery(jobName='hello',
          environment='test', owner=Identity(role='mchucarroll')))
 def make_task_configs(self, count=1):
     return [
         TaskConfig(
             owner=Identity(role=self._job_key.role),
             environment=self._job_key.environment,
             jobName=self._job_key.name,
             numCpus=6.0,
             ramMb=1024,
             diskMb=2048,
             priority=0,
             maxTaskFailures=1,
             production=True,
             taskLinks={'task': 'link'},
             contactEmail='*****@*****.**',
             executorConfig=ExecutorConfig(name='test', data='test data')
             # Not setting any set()-related properties as that throws off mox verification.
         )
     ] * count
  def test_successful_status(self):
    """Test the status command."""
    # Calls api.check_status, which calls scheduler.getJobs
    mock_options = self.setup_mock_options()
    (mock_api, mock_scheduler) = self.create_mock_api()
    mock_scheduler.getTasksStatus.return_value = self.create_status_response()
    with contextlib.nested(
        patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler),
        patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS),
        patch('twitter.common.app.get_options', return_value=mock_options)) as (
            mock_scheduler_proxy_class,
            mock_clusters,
            options):
      status(['west/mchucarroll/test/hello'], mock_options)

      # The status command sends a getTasksStatus query to the scheduler,
      # and then prints the result.
      mock_scheduler.getTasksStatus.assert_called_with(TaskQuery(jobName='hello',
          environment='test', owner=Identity(role='mchucarroll')))
def test_simple_config():
    job = convert_pystachio_to_thrift(HELLO_WORLD)
    assert job.instanceCount == 1
    tti = job.taskConfig
    assert job.key == JobKey(role=HELLO_WORLD.role().get(),
                             environment=HELLO_WORLD.environment().get(),
                             name=HELLO_WORLD.name().get())
    assert job.owner == Identity(role=HELLO_WORLD.role().get(),
                                 user=getpass.getuser())
    assert job.cronSchedule == ''
    assert tti.jobName == 'hello_world'
    assert tti.isService == False
    assert tti.numCpus == 0.1
    assert tti.ramMb == 64
    assert tti.diskMb == 64
    assert tti.requestedPorts == set()
    assert tti.production == False
    assert tti.priority == 0
    assert tti.maxTaskFailures == 1
    assert tti.constraints == set()
    assert tti.packages == set()
    assert tti.environment == HELLO_WORLD.environment().get()
    def _get_tasks_by_instance_id(self, instance_ids):
        log.debug('Querying instance statuses.')
        query = TaskQuery()
        query.owner = Identity(role=self._job_key.role)
        query.environment = self._job_key.environment
        query.jobName = self._job_key.name
        query.statuses = set([ScheduleStatus.RUNNING])

        query.instanceIds = instance_ids
        try:
            resp = self._scheduler.getTasksStatus(query)
        except IOError as e:
            log.error('IO Exception during scheduler call: %s' % e)
            return []

        tasks = []
        if resp.responseCode == ResponseCode.OK:
            tasks = resp.result.scheduleStatusResult.tasks

        log.debug(
            'Response from scheduler: %s (message: %s)' %
            (ResponseCode._VALUES_TO_NAMES[resp.responseCode], resp.message))
        return tasks
Exemple #16
0
 def _create_task_query(self, instanceIds=None):
     return TaskQuery(owner=Identity(role=self._job_key.role),
                      environment=self._job_key.environment,
                      jobName=self._job_key.name,
                      statuses=ACTIVE_STATES,
                      instanceIds=instanceIds)
Exemple #17
0
 def build_query(cls, role, job, instances=None, statuses=LIVE_STATES, env=None):
   return TaskQuery(owner=Identity(role=role),
                    jobName=job,
                    statuses=statuses,
                    instanceIds=instances,
                    environment=env)
Exemple #18
0
 def query_from(cls, role, env, job):
     return TaskQuery(statuses=LIVE_STATES,
                      owner=Identity(role),
                      jobName=job,
                      environment=env)
Exemple #19
0
 def to_thrift_query(self):
   return TaskQuery(owner=Identity(role=self.role), environment=self.env, jobName=self.name)
Exemple #20
0
 def create_mock_query(cls):
     return TaskQuery(owner=Identity(role=cls.TEST_ROLE),
                      environment=cls.TEST_ENV,
                      jobName=cls.TEST_JOB)
 def make_job_config(self, task_config, instance_count, cron_schedule=None):
     return JobConfiguration(key=self._job_key,
                             owner=Identity(role=self._job_key.role),
                             cronSchedule=cron_schedule,
                             taskConfig=task_config,
                             instanceCount=instance_count)
Exemple #22
0
def convert(job, packages=frozenset(), ports=frozenset()):
    """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration."""

    owner = Identity(role=fully_interpolated(job.role()),
                     user=getpass.getuser())
    key = JobKey(
        role=assert_valid_field('role', fully_interpolated(job.role())),
        environment=assert_valid_field('environment',
                                       fully_interpolated(job.environment())),
        name=assert_valid_field('name', fully_interpolated(job.name())))

    task_raw = job.task()

    MB = 1024 * 1024
    task = TaskConfig()

    def not_empty_or(item, default):
        return default if item is Empty else fully_interpolated(item)

    # job components
    task.jobName = fully_interpolated(job.name())
    task.environment = fully_interpolated(job.environment())
    task.production = fully_interpolated(job.production(), bool)
    task.isService = select_service_bit(job)
    task.maxTaskFailures = fully_interpolated(job.max_task_failures())
    task.priority = fully_interpolated(job.priority())
    task.contactEmail = not_empty_or(job.contact(), None)

    # Add package tuples to a task, to display in the scheduler UI.
    task.packages = frozenset(
        Package(role=str(role), name=str(package_name), version=int(version))
        for role, package_name, version in packages)

    # task components
    if not task_raw.has_resources():
        raise InvalidConfig('Task must specify resources!')

    if (fully_interpolated(task_raw.resources().ram()) == 0
            or fully_interpolated(task_raw.resources().disk()) == 0):
        raise InvalidConfig(
            'Must specify ram and disk resources, got ram:%r disk:%r' %
            (fully_interpolated(task_raw.resources().ram()),
             fully_interpolated(task_raw.resources().disk())))

    task.numCpus = fully_interpolated(task_raw.resources().cpu())
    task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB
    task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB
    if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0:
        raise InvalidConfig(
            'Task has invalid resources.  cpu/ramMb/diskMb must all be positive: '
            'cpu:%r ramMb:%r diskMb:%r' %
            (task.numCpus, task.ramMb, task.diskMb))

    task.owner = owner
    task.requestedPorts = ports
    task.taskLinks = not_empty_or(job.task_links(), {})
    task.constraints = constraints_to_thrift(
        not_empty_or(job.constraints(), {}))

    underlying, refs = job.interpolate()

    # need to fake an instance id for the sake of schema checking
    underlying_checked = underlying.bind(mesos={'instance': 31337})
    try:
        ThermosTaskValidator.assert_valid_task(underlying_checked.task())
    except ThermosTaskValidator.InvalidTaskError as e:
        raise InvalidConfig('Task is invalid: %s' % e)
    if not underlying_checked.check().ok():
        raise InvalidConfig('Job not fully specified: %s' %
                            underlying.check().message())

    unbound = []
    for ref in refs:
        if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or (
                Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)):
            continue
        unbound.append(ref)

    if unbound:
        raise InvalidConfig('Config contains unbound variables: %s' %
                            ' '.join(map(str, unbound)))

    cron_schedule = not_empty_or(job.cron_schedule(), '')
    cron_policy = select_cron_policy(job.cron_policy(),
                                     job.cron_collision_policy())

    task.executorConfig = ExecutorConfig(
        name=AURORA_EXECUTOR_NAME,
        data=filter_aliased_fields(underlying).json_dumps())

    return JobConfiguration(key=key,
                            owner=owner,
                            cronSchedule=cron_schedule,
                            cronCollisionPolicy=cron_policy,
                            taskConfig=task,
                            instanceCount=fully_interpolated(job.instances()))