def test_from_assigned_task_http_endpoint_style_config(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 http_config = HttpHealthChecker(endpoint='/foo', expected_response='bar', expected_response_code=201) task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(http=http_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=7)).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'health': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) http_exec_config = execconfig_data['health_check_config'][ 'health_checker']['http'] assert http_exec_config['endpoint'] == '/foo' assert http_exec_config['expected_response'] == 'bar' assert http_exec_config['expected_response_code'] == 201 health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs
def create_scheduled_tasks(cls): tasks = [] for name in ['foo', 'bar', 'baz']: task = ScheduledTask( failureCount=0, assignedTask=AssignedTask( taskId=1287391823, slaveHost='slavehost', task=TaskConfig( maxTaskFailures=1, executorConfig=ExecutorConfig(data='fake data'), metadata=[], job=JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name), owner=Identity(role=cls.TEST_ROLE), environment=cls.TEST_ENV, jobName=name, numCpus=2, ramMb=2, diskMb=2), instanceId=4237894, assignedPorts={}), status=ScheduleStatus.RUNNING, taskEvents=[ TaskEvent(timestamp=28234726395, status=ScheduleStatus.RUNNING, message="Hi there") ]) tasks.append(task) return tasks
def test_interpolate_cmd(self): """Making sure thermos.ports[foo] gets correctly substituted with assignedPorts info.""" interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_cmd = 'FOO_PORT={{thermos.ports[foo]}} failed command' shell_config = ShellHealthChecker(shell_command=shell_cmd) task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) interpolated_cmd = HealthCheckerProvider.interpolate_cmd(assigned_task, cmd=shell_cmd) assert interpolated_cmd == 'FOO_PORT=9001 failed command'
def create_scheduled_task(instance, start_time): task = ScheduledTask(assignedTask=AssignedTask( taskId="task_%s" % instance, slaveId="random_machine_id", slaveHost="junk.nothing", task=TaskConfig(job=JobKey(role="nobody", environment="prod", name='flibber'), owner=Identity(role="nobody"), environment="prod", jobName="flibber", isService=False, numCpus=2, ramMb=2048, diskMb=4096, priority=7, maxTaskFailures=3, production=False, requestedPorts=["http"]), assignedPorts={"http": 1001}, instanceId=instance), status=2, failureCount=instance + 4, taskEvents=create_task_events(start_time), ancestorId="random_task_ancestor%s" % instance) return task
def make_assigned_task(thermos_config, assigned_ports=None): from gen.apache.aurora.api.constants import AURORA_EXECUTOR_NAME from gen.apache.aurora.api.ttypes import ( AssignedTask, ExecutorConfig, Identity, JobKey, TaskConfig ) assigned_ports = assigned_ports or {} executor_config = ExecutorConfig(name=AURORA_EXECUTOR_NAME, data=thermos_config.json_dumps()) task_config = TaskConfig( job=JobKey( role=thermos_config.role().get(), environment="prod", name=thermos_config.name().get()), owner=Identity(role=thermos_config.role().get(), user=thermos_config.role().get()), environment=thermos_config.environment().get(), jobName=thermos_config.name().get(), executorConfig=executor_config) return AssignedTask( instanceId=12345, task=task_config, assignedPorts=assigned_ports, slaveHost='test-host')
def setup_get_tasks_status_calls(cls, scheduler): status_response = cls.create_simple_success_response() scheduler.getTasksStatus.return_value = status_response scheduler.getTasksWithoutConfigs.return_value = status_response task_config = TaskConfig(numCpus=1.0, ramMb=10, diskMb=1, job=JobKey(role='bozo', environment='test', name='hello')) # This should be a list of ScheduledTask's. tasks = [] for i in range(20): task_status = create_autospec(spec=ScheduledTask, instance=True) task_status.assignedTask = create_autospec(spec=AssignedTask, instance=True) task_status.assignedTask.instanceId = i task_status.assignedTask.taskId = "Task%s" % i task_status.assignedTask.slaveId = "Slave%s" % i task_status.slaveHost = "Slave%s" % i task_status.assignedTask.task = task_config tasks.append(task_status) status_response.result = Result( scheduleStatusResult=ScheduleStatusResult(tasks=tasks))
def test_from_assigned_task_shell_no_demotion(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' health_checker = HealthCheckerProvider( nosetuid_health_checks=True).from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures # Should not be trying to access role's user info. assert not mock_getpwnam.called
def test_from_assigned_task_shell(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures
def create_scheduled_task(instance, start_time): task = ScheduledTask() task.assignedTask = AssignedTask() task.assignedTask.taskId = "task_%s" % instance task.assignedTask.slaveId = "random_machine_id" task.assignedTask.slaveHost = "junk.nothing" task.assignedTask.task = TaskConfig() task.assignedTask.task.owner = Identity(role="nobody") task.assignedTask.task.environment = "prod" task.assignedTask.task.jobName = "flibber" task.assignedTask.task.isService = False task.assignedTask.task.numCpus = 2 task.assignedTask.task.ramMb = 2048 task.assignedTask.task.diskMb = 4096 task.assignedTask.task.priority = 7 task.assignedTask.task.maxTaskFailures = 3 task.assignedTask.task.production = False task.assignedTask.task.requestedPorts = ["http"] task.assignedTask.assignedPorts = {"http": 1001} task.assignedTask.instanceId = 0 task.status = 2 task.failureCount = instance + 4 task.taskEvents = create_task_events(start_time) task.ancestorId = "random_task_ancestor%s" % instance return task
def test_get_job_update_diff(self): """Test getting job update diff.""" api, mock_proxy = self.mock_api() task_config = TaskConfig() mock_proxy.getJobUpdateDiff.return_value = self.create_simple_success_response() api.get_job_update_diff(self.mock_job_config(), instances=None) mock_proxy.getJobUpdateDiff.assert_called_once_with(self.create_update_request(task_config))
def create_task(cls): return [ ScheduledTask(assignedTask=AssignedTask( instanceId=0, task=TaskConfig( job=JobKey(role='role', environment='test', name='job'))), status=ScheduleStatus.RUNNING) ]
def test_start_job_update(self): """Test successful job update start.""" api, mock_proxy = self.mock_api() task_config = TaskConfig() mock_proxy.startJobUpdate.return_value = self.create_simple_success_response() api.start_job_update(self.mock_job_config()) mock_proxy.startJobUpdate.assert_called_once_with(self.create_update_request(task_config))
def create_task(cls): return [ ScheduledTask(assignedTask=AssignedTask( instanceId=0, task=TaskConfig(owner=Identity(role='test_role'), jobName='test_job')), status=ScheduleStatus.RUNNING) ]
def test_deserialize_thermos_task_unbound_refs(): task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos', data=MESOS_JOB(task=HELLO_WORLD_UNBOUND).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=0) with pytest.raises(ValueError) as execinfo: mesos_task_instance_from_assigned_task(assigned_task) assert execinfo.value.message == "Unexpected unbound refs: {{unbound_cmd}} {{unbound}}"
def mock_status_active_tasks(self, instance_ids): tasks = [] for i in instance_ids: tasks.append(ScheduledTask( status=ScheduleStatus.RUNNING, assignedTask=AssignedTask(task=TaskConfig(), instanceId=i) )) response = make_response(result=Result(scheduleStatusResult=ScheduleStatusResult(tasks=tasks))) self.mock_scheduler.getTasksWithoutConfigs(IgnoreArg()).AndReturn(response)
def setup_populate_job_config(cls, api, count=20): populate = cls.create_simple_success_response() populate.result.populateJobResult = Mock(spec=PopulateJobResult) api.populateJobConfig.return_value = populate configs = [ TaskConfig(numCpus=1.0, ramMb=1, diskMb=1) for i in range(count) ] populate.result.populateJobResult.populatedDEPRECATED = set(configs) return populate
def setup_populate_job_config(cls, api, count=20): populate = cls.create_simple_success_response() config = TaskConfig( numCpus=1.0, ramMb=1, diskMb=1, job=JobKey(role='bozo', environment='test', name='hello')) populate.result = Result(populateJobResult=PopulateJobResult(taskConfig=config)) api.populateJobConfig.return_value = populate return populate
def setup_populate_job_config(cls, api): populate = cls.create_simple_success_response() populate.result.populateJobResult = Mock(spec=PopulateJobResult) api.populateJobConfig.return_value = populate configs = [] for _ in range(20): task_config = TaskConfig(numCpus=1.0, ramMb=1, diskMb=1) configs.append(task_config) populate.result.populateJobResult.populatedDEPRECATED = set(configs) return populate
def create_task_config(cls, name): return TaskConfig( maxTaskFailures=1, executorConfig=ExecutorConfig(data='{"fake": "data"}'), metadata=[], job=JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name), numCpus=2, ramMb=2, diskMb=2)
def test_backfill_task(self): task = TaskConfig(numCpus=1.0, ramMb=2, diskMb=3, requestedPorts=frozenset(['http'])) assert ResourceManager.resource_details_from_quota(task) == [ ResourceDetails(ResourceType.CPUS, 1.0), ResourceDetails(ResourceType.RAM_MB, 2), ResourceDetails(ResourceType.DISK_MB, 3), ResourceDetails(ResourceType.PORTS, 'http'), ]
def create_task_config(cls, name): return TaskConfig(maxTaskFailures=1, executorConfig=ExecutorConfig(data='fake data'), metadata=[], job=JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name), owner=Identity(role=cls.TEST_ROLE), environment=cls.TEST_ENV, jobName=name, numCpus=2, ramMb=2, diskMb=2)
def create_scheduled_task(cls, instance_id, status=ScheduleStatus.RUNNING, task_id=None, initial_time=None): task = ScheduledTask( status=status, assignedTask=AssignedTask(instanceId=instance_id, taskId=task_id or "Task%s" % instance_id, slaveId="Slave%s" % instance_id, slaveHost="Slave%s" % instance_id, task=TaskConfig()), taskEvents=[TaskEvent(timestamp=initial_time or 1000)]) return task
def mock_job_config(cls, error=None): config = create_autospec(spec=AuroraConfig, instance=True) mock_get = create_autospec(spec=UpdateConfig, instance=True) mock_get.get.return_value = cls.UPDATE_CONFIG if error: config.update_config.side_effect = error else: config.update_config.return_value = mock_get mock_task_config = create_autospec(spec=JobConfiguration, instance=True) mock_task_config.taskConfig = TaskConfig() config.job.return_value = mock_task_config config.instances.return_value = 5 return config
def setup_populate_job_config(cls, api, count=20): populate = cls.create_simple_success_response() configs = [ TaskConfig(numCpus=1.0, ramMb=1, diskMb=1, job=JobKey(role='bozo', environment='test', name='hello')) for i in range(count) ] populate.result = Result(populateJobResult=PopulateJobResult( populatedDEPRECATED=set(configs), taskConfig=configs[0])) api.populateJobConfig.return_value = populate return populate
def create_task(self, duration, id, host=None, name=None, prod=None): return ScheduledTask(assignedTask=AssignedTask( instanceId=id, slaveHost=host, task=TaskConfig(production=prod if prod is not None else True, job=JobKey(role=self._role, environment=self._env, name=name or self._name))), status=ScheduleStatus.RUNNING, taskEvents=[ TaskEvent(status=ScheduleStatus.RUNNING, timestamp=(time.time() - duration) * 1000) ])
def test_from_assigned_task_shell_filesystem_image(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' mock_sandbox = mock.Mock(spec_set=SandboxInterface) type(mock_sandbox).root = mock.PropertyMock(return_value='/some/path') type(mock_sandbox).is_filesystem_image = mock.PropertyMock( return_value=True) with mock.patch( 'apache.aurora.executor.common.health_checker.ShellHealthCheck' ) as mock_shell: HealthCheckerProvider( nosetuid_health_checks=False, mesos_containerizer_path='/some/path/mesos-containerizer' ).from_assigned_task(assigned_task, mock_sandbox) class NotNone(object): def __eq__(self, other): return other is not None assert mock_shell.mock_calls == [ mock.call(cmd='failed command', wrapper_fn=NotNone(), preexec_fn=None, timeout_secs=5.0) ]
def mock_status_active_tasks(self, instance_ids): tasks = [] for i in instance_ids: tasks.append( ScheduledTask(status=ScheduleStatus.RUNNING, assignedTask=AssignedTask(task=TaskConfig(), instanceId=i))) response = Response(responseCode=ResponseCode.OK, messageDEPRECATED='test') response.result = Result() response.result.scheduleStatusResult = ScheduleStatusResult( tasks=tasks) self.mock_scheduler.getTasksWithoutConfigs( IgnoreArg()).AndReturn(response)
def make_task(thermos_config, assigned_ports={}, **kw): role = getpass.getuser() task_id = thermos_config.task().name().get() + '-001' at = AssignedTask(taskId=task_id, task=TaskConfig(executorConfig=ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=thermos_config.json_dumps()), owner=Identity(role=role, user=role)), assignedPorts=assigned_ports, **kw) td = mesos_pb2.TaskInfo() td.task_id.value = task_id td.name = thermos_config.task().name().get() td.data = serialize(at) return td
def mock_job_config(cls, error=None): config = create_autospec(spec=AuroraConfig, instance=True) update_config = UpdateConfig(batch_size=1, watch_secs=50, max_per_shard_failures=2, max_total_failures=1) if error: config.update_config.side_effect = error else: config.update_config.return_value = update_config mock_task_config = create_autospec(spec=JobConfiguration, instance=True) mock_task_config.taskConfig = TaskConfig() config.job.return_value = mock_task_config config.instances.return_value = 5 return config
def make_assigned_task(thermos_config, assigned_ports=None): from gen.apache.aurora.api.constants import AURORA_EXECUTOR_NAME from gen.apache.aurora.api.ttypes import AssignedTask, ExecutorConfig, JobKey, TaskConfig assigned_ports = assigned_ports or {} executor_config = ExecutorConfig(name=AURORA_EXECUTOR_NAME, data=thermos_config.json_dumps()) task_config = TaskConfig(job=JobKey(role=thermos_config.role().get(), environment='test', name=thermos_config.name().get()), executorConfig=executor_config) return AssignedTask(instanceId=12345, task=task_config, assignedPorts=assigned_ports, taskId="taskId-12345")
def convert(job, metadata=frozenset(), ports=frozenset()): """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration.""" owner = Identity(user=getpass.getuser()) key = JobKey( role=assert_valid_field('role', fully_interpolated(job.role())), environment=assert_valid_field('environment', fully_interpolated(job.environment())), name=assert_valid_field('name', fully_interpolated(job.name()))) task_raw = job.task() MB = 1024 * 1024 task = TaskConfig() def not_empty_or(item, default): return default if item is Empty else fully_interpolated(item) # job components task.production = fully_interpolated(job.production(), bool) task.isService = select_service_bit(job) task.maxTaskFailures = fully_interpolated(job.max_task_failures()) task.priority = fully_interpolated(job.priority()) task.contactEmail = not_empty_or(job.contact(), None) task.tier = not_empty_or(job.tier(), None) # Add metadata to a task, to display in the scheduler UI. task.metadata = frozenset(Metadata(key=str(key), value=str(value)) for key, value in metadata) # task components if not task_raw.has_resources(): raise InvalidConfig('Task must specify resources!') if (fully_interpolated(task_raw.resources().ram()) == 0 or fully_interpolated(task_raw.resources().disk()) == 0): raise InvalidConfig('Must specify ram and disk resources, got ram:%r disk:%r' % ( fully_interpolated(task_raw.resources().ram()), fully_interpolated(task_raw.resources().disk()))) task.numCpus = fully_interpolated(task_raw.resources().cpu()) task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0: raise InvalidConfig('Task has invalid resources. cpu/ramMb/diskMb must all be positive: ' 'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb)) task.job = key task.owner = owner task.requestedPorts = ports task.taskLinks = {} # See AURORA-739 task.constraints = constraints_to_thrift(not_empty_or(job.constraints(), {})) task.container = create_container_config(job.container()) underlying, refs = job.interpolate() # need to fake an instance id for the sake of schema checking underlying_checked = underlying.bind(mesos={'instance': 31337, 'hostname': ''}) try: ThermosTaskValidator.assert_valid_task(underlying_checked.task()) except ThermosTaskValidator.InvalidTaskError as e: raise InvalidConfig('Task is invalid: %s' % e) if not underlying_checked.check().ok(): raise InvalidConfig('Job not fully specified: %s' % underlying.check().message()) unbound = [] for ref in refs: if ref in (THERMOS_TASK_ID_REF, MESOS_INSTANCE_REF, MESOS_HOSTNAME_REF) or ( Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)): continue unbound.append(ref) if unbound: raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound))) task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=filter_aliased_fields(underlying).json_dumps()) return JobConfiguration( key=key, owner=owner, cronSchedule=not_empty_or(job.cron_schedule(), None), cronCollisionPolicy=select_cron_policy(job.cron_collision_policy()), taskConfig=task, instanceCount=fully_interpolated(job.instances()))