def assert_correct_status_calls(cls, api): # getTasksStatus gets called a lot of times. The exact number isn't fixed; it loops # over the health checks until all of them pass for a configured period of time. # The minumum number of calls is 5: once before the tasks are restarted, and then # once for each batch of restarts (Since the batch size is set to 5, and the # total number of jobs is 20, that's 4 batches.) assert api.getTasksStatus.call_count >= 5 # In the first getStatus call, it uses an expansive query; in the rest, it only queries for # status RUNNING. status_calls = api.getTasksStatus.call_args_list assert status_calls[0][0][0] == TaskQuery(taskIds=None, jobName='hello', environment='test', owner=Identity( role=u'mchucarroll', user=None), statuses=cls.QUERY_STATUSES) for status_call in status_calls[1:]: status_call[0][0] == TaskQuery(taskIds=None, jobName='hello', environment='test', owner=Identity(role='mchucarroll', user=None), statuses=set( [ScheduleStatus.RUNNING]))
def create_mock_scheduled_tasks(cls): jobs = [] for name in ['foo', 'bar', 'baz']: job = Mock() job.key = JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name) job.failure_count = 0 job.assignedTask = Mock(spec=AssignedTask) job.assignedTask.taskId = 1287391823 job.assignedTask.slaveHost = 'slavehost' job.assignedTask.task = Mock(spec=TaskConfig) job.assignedTask.task.executorConfig = Mock() job.assignedTask.task.maxTaskFailures = 1 job.assignedTask.task.packages = [] job.assignedTask.task.owner = Identity(role='mchucarroll') job.assignedTask.task.environment = 'test' job.assignedTask.task.jobName = 'woops' job.assignedTask.task.numCpus = 2 job.assignedTask.task.ramMb = 2 job.assignedTask.task.diskMb = 2 job.assignedTask.instanceId = 4237894 job.assignedTask.assignedPorts = {} job.status = ScheduleStatus.RUNNING mockEvent = Mock(spec=TaskEvent) mockEvent.timestamp = 28234726395 mockEvent.status = ScheduleStatus.RUNNING mockEvent.message = "Hi there" job.taskEvents = [mockEvent] jobs.append(job) return jobs
def get_expected_task_query(cls, shards=None): instance_ids = frozenset(shards) if shards is not None else None # Helper to create the query that will be a parameter to job kill. return TaskQuery(taskIds=None, jobName=cls.TEST_JOB, environment=cls.TEST_ENV, instanceIds=instance_ids, owner=Identity(role=cls.TEST_ROLE, user=None))
def __init__(self, client, role, env, jobname): self._client = client self._query = TaskQuery(owner=Identity(role=role), environment=env, jobName=jobname) self._initial_tasks = set() self._initial_tasks = set(task.assignedTask.taskId for task in self.iter_query())
def assert_correct_killtask_calls(cls, api): assert api.killTasks.call_count == 4 # Check the last call's parameters. api.killTasks.assert_called_with( TaskQuery(taskIds=None, jobName='hello', environment='test', instanceIds=frozenset([16, 17, 18, 19, 15]), owner=Identity(role=u'mchucarroll', user=None), statuses=cls.QUERY_STATUSES), 'foo')
def expect_kill(self, instance_ids, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=frozenset([int(s) for s in instance_ids])) self._scheduler.killTasks(query, self._lock, self._session_key).AndReturn(response)
def test_successful_run(self): """Test the run command.""" # Calls api.check_status, which calls scheduler.getJobs mock_options = self.setup_mock_options() (mock_api, mock_scheduler) = self.create_mock_api() mock_scheduler.getTasksStatus.return_value = self.create_status_response( ) sandbox_args = { 'slave_root': '/slaveroot', 'slave_run_directory': 'slaverun' } with contextlib.nested( patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler), patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.aurora.client.commands.run.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options), patch( 'twitter.aurora.client.api.command_runner.DistributedCommandRunner.sandbox_args', return_value=sandbox_args), patch('subprocess.Popen', return_value=self.create_mock_process())) as ( mock_scheduler_proxy_class, mock_clusters, mock_clusters_runpatch, options, mock_runner_args_patch, mock_subprocess): run(['west/mchucarroll/test/hello', 'ls'], mock_options) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. mock_scheduler.getTasksStatus.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll'), statuses=set([ ScheduleStatus.RUNNING, ScheduleStatus.KILLING, ScheduleStatus.RESTARTING, ScheduleStatus.PREEMPTING ]))) # The mock status call returns 3 three ScheduledTasks, so three commands should have been run assert mock_subprocess.call_count == 3 mock_subprocess.assert_called_with([ 'ssh', '-n', '-q', 'mchucarroll@slavehost', 'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/' 'slaverun/sandbox;ls' ], stderr=-2, stdout=-1)
def test_init(self): result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=[])) response = Response(responseCode=ResponseCode.OK, message="test", result=result) query = TaskQuery(owner=Identity(role=ROLE), environment=ENV, jobName=JOB_NAME) self.mock_scheduler.getTasksStatus(query).AndReturn(response) self.mox.ReplayAll() JobMonitor(self.mock_api, ROLE, ENV, JOB_NAME)
def make_task(thermos_config, assigned_ports={}, **kw): role = getpass.getuser() task_id = thermos_config.task().name().get() + '-001' at = AssignedTask(taskId=task_id, task=TaskConfig(executorConfig=ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=thermos_config.json_dumps()), owner=Identity(role=role, user=role)), assignedPorts=assigned_ports, **kw) td = mesos_pb.TaskInfo() td.task_id.value = task_id td.name = thermos_config.task().name().get() td.data = serialize(at) return td
def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append( ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response.result = Result(scheduleStatusResult=ScheduleStatusResult( tasks=scheduled)) query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response)
def test_unsuccessful_status(self): """Test the status command when the user asks the status of a job that doesn't exist.""" # Calls api.check_status, which calls scheduler.getJobs mock_options = self.setup_mock_options() (mock_api, mock_scheduler) = self.create_mock_api() mock_scheduler.getTasksStatus.return_value = self.create_failed_status_response() with contextlib.nested( patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler), patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options)) as ( mock_scheduler_proxy_class, mock_clusters, options): self.assertRaises(SystemExit, status, ['west/mchucarroll/test/hello'], mock_options) mock_scheduler.getTasksStatus.assert_called_with(TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll')))
def make_task_configs(self, count=1): return [ TaskConfig( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, numCpus=6.0, ramMb=1024, diskMb=2048, priority=0, maxTaskFailures=1, production=True, taskLinks={'task': 'link'}, contactEmail='*****@*****.**', executorConfig=ExecutorConfig(name='test', data='test data') # Not setting any set()-related properties as that throws off mox verification. ) ] * count
def test_successful_status(self): """Test the status command.""" # Calls api.check_status, which calls scheduler.getJobs mock_options = self.setup_mock_options() (mock_api, mock_scheduler) = self.create_mock_api() mock_scheduler.getTasksStatus.return_value = self.create_status_response() with contextlib.nested( patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler), patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options)) as ( mock_scheduler_proxy_class, mock_clusters, options): status(['west/mchucarroll/test/hello'], mock_options) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. mock_scheduler.getTasksStatus.assert_called_with(TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll')))
def test_simple_config(): job = convert_pystachio_to_thrift(HELLO_WORLD) assert job.instanceCount == 1 tti = job.taskConfig assert job.key == JobKey(role=HELLO_WORLD.role().get(), environment=HELLO_WORLD.environment().get(), name=HELLO_WORLD.name().get()) assert job.owner == Identity(role=HELLO_WORLD.role().get(), user=getpass.getuser()) assert job.cronSchedule == '' assert tti.jobName == 'hello_world' assert tti.isService == False assert tti.numCpus == 0.1 assert tti.ramMb == 64 assert tti.diskMb == 64 assert tti.requestedPorts == set() assert tti.production == False assert tti.priority == 0 assert tti.maxTaskFailures == 1 assert tti.constraints == set() assert tti.packages == set() assert tti.environment == HELLO_WORLD.environment().get()
def _get_tasks_by_instance_id(self, instance_ids): log.debug('Querying instance statuses.') query = TaskQuery() query.owner = Identity(role=self._job_key.role) query.environment = self._job_key.environment query.jobName = self._job_key.name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids try: resp = self._scheduler.getTasksStatus(query) except IOError as e: log.error('IO Exception during scheduler call: %s' % e) return [] tasks = [] if resp.responseCode == ResponseCode.OK: tasks = resp.result.scheduleStatusResult.tasks log.debug( 'Response from scheduler: %s (message: %s)' % (ResponseCode._VALUES_TO_NAMES[resp.responseCode], resp.message)) return tasks
def _create_task_query(self, instanceIds=None): return TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=instanceIds)
def build_query(cls, role, job, instances=None, statuses=LIVE_STATES, env=None): return TaskQuery(owner=Identity(role=role), jobName=job, statuses=statuses, instanceIds=instances, environment=env)
def query_from(cls, role, env, job): return TaskQuery(statuses=LIVE_STATES, owner=Identity(role), jobName=job, environment=env)
def to_thrift_query(self): return TaskQuery(owner=Identity(role=self.role), environment=self.env, jobName=self.name)
def create_mock_query(cls): return TaskQuery(owner=Identity(role=cls.TEST_ROLE), environment=cls.TEST_ENV, jobName=cls.TEST_JOB)
def make_job_config(self, task_config, instance_count, cron_schedule=None): return JobConfiguration(key=self._job_key, owner=Identity(role=self._job_key.role), cronSchedule=cron_schedule, taskConfig=task_config, instanceCount=instance_count)
def convert(job, packages=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(role=fully_interpolated(job.role()), user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.jobName = fully_interpolated(job.name()) task.environment = fully_interpolated(job.environment()) task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) # Add package tuples to a task, to display in the scheduler UI. task.packages = frozenset( Package(role=str(role), name=str(package_name), version=int(version)) for role, package_name, version in packages) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig( 'Must specify ram and disk resources, got ram:%r disk:%r' % (fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) task.numCpus = fully_interpolated(task_raw.resources().cpu()) task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0: raise InvalidConfig( 'Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb)) task.owner = owner task.requestedPorts = ports task.taskLinks = not_empty_or(job.task_links(), {}) task.constraints = constraints_to_thrift( not_empty_or(job.constraints(), {})) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos={'instance': 31337}) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or ( Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) cron_schedule = not_empty_or(job.cron_schedule(), '') cron_policy = select_cron_policy(job.cron_policy(), job.cron_collision_policy()) task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) return JobConfiguration(key=key, owner=owner, cronSchedule=cron_schedule, cronCollisionPolicy=cron_policy, taskConfig=task, instanceCount=fully_interpolated(job.instances()))