def init_pvc(data, filer): task_name = data['executors'][0]['metadata']['labels']['taskmaster-name'] pvc_name = task_name + '-pvc' pvc_size = data['resources']['disk_gb'] pvc = PVC(pvc_name, pvc_size, args.namespace) mounts = generate_mounts(data, pvc) logging.debug(mounts) logging.debug(type(mounts)) pvc.set_volume_mounts(mounts) filer.add_volume_mount(pvc) pvc.create() # to global var for cleanup purposes global created_pvc created_pvc = pvc if os.environ.get('NETRC_SECRET_NAME') is not None: filer.add_netrc_mount(os.environ.get('NETRC_SECRET_NAME')) filerjob = Job(filer.get_spec('inputs', args.debug), task_name + '-inputs-filer', args.namespace) global created_jobs created_jobs.append(filerjob) # filerjob.run_to_completion(poll_interval) status = filerjob.run_to_completion(poll_interval, check_cancelled, args.pod_timeout) if status != 'Complete': exit_cancelled('Got status ' + status) return pvc
def run_executor(executor, namespace, pvc=None): jobname = executor['metadata']['name'] spec = executor['spec']['template']['spec'] if os.environ.get('EXECUTOR_BACKOFF_LIMIT') is not None: executor['spec'].update( {'backoffLimit': int(os.environ['EXECUTOR_BACKOFF_LIMIT'])}) if pvc is not None: mounts = spec['containers'][0].setdefault('volumeMounts', []) mounts.extend(pvc.volume_mounts) volumes = spec.setdefault('volumes', []) volumes.extend([{ 'name': task_volume_basename, 'persistentVolumeClaim': { 'readonly': False, 'claimName': pvc.name } }]) logger.debug('Created job: ' + jobname) job = Job(executor, jobname, namespace) logger.debug('Job spec: ' + str(job.body)) global created_jobs created_jobs.append(job) status = job.run_to_completion(poll_interval, check_cancelled, args.pod_timeout) if status != 'Complete': if status == 'Error': job.delete() exit_cancelled('Got status ' + status)
def run_task(data, filer_name, filer_version): task_name = data['executors'][0]['metadata']['labels']['taskmaster-name'] pvc = None if data['volumes'] or data['inputs'] or data['outputs']: filer = Filer(task_name + '-filer', data, filer_name, filer_version, args.pull_policy_always) if os.environ.get('TESK_FTP_USERNAME') is not None: filer.set_ftp(os.environ['TESK_FTP_USERNAME'], os.environ['TESK_FTP_PASSWORD']) pvc = init_pvc(data, filer) for executor in data['executors']: run_executor(executor, args.namespace, pvc) # run executors logging.debug("Finished running executors") # upload files and delete pvc if data['volumes'] or data['inputs'] or data['outputs']: filerjob = Job(filer.get_spec('outputs', args.debug), task_name + '-outputs-filer', args.namespace) global created_jobs created_jobs.append(filerjob) # filerjob.run_to_completion(poll_interval) status = filerjob.run_to_completion(poll_interval, check_cancelled, args.pod_timeout) if status != 'Complete': exit_cancelled('Got status ' + status) else: pvc.delete()
def test_get_status_success(self, mock_read_namespaced_job, mock_list_namespaced_pod): """ Checking if job status is complete """ executor = self.data['executors'][0] jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status, all_pods_running = job.get_status(False) self.assertEqual(status, "Complete")
def test_get_status_running(self, mock_read_namespaced_job, mock_list_namespaced_pod): """ Checking if the job is in running state in an ideal situation """ executor = self.data['executors'][0] jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status, all_pods_running = job.get_status(False) self.assertEqual(status, "Running")
def test_run_to_completion_success(self, mock_get_status, mock_create_namespaced_job): """ Checking if the Job runs is completed successfully """ for executor in self.data['executors']: jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status = job.run_to_completion(1, taskmaster.check_cancelled, taskmaster.args.pod_timeout) self.assertEqual(status, "Complete")
def test_run_to_completion_check_other_K8_exception( self, mock_create_namespaced_job): """ Checking if the an exception is raised when ApiException status is other than 409 """ for executor in self.data['executors']: jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) with self.assertRaises(ApiException): job.run_to_completion(taskmaster.args.poll_interval, taskmaster.check_cancelled, taskmaster.args.pod_timeout)
def test_get_job_status_for_failed_pod(self, mock_read_namespaced_job, mock_list_namespaced_pod): """ Checking if the job status is 'running' when the pod failed to start with a reason other than ImagePullBackOff. """ mock_list_namespaced_pod.return_value = list_namespaced_pod_pending_unknown_error( ) mock_read_namespaced_job.return_value = read_namespaced_job_pending() executor = self.data['executors'][0] jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status, all_pods_running = job.get_status(False) self.assertEqual(status, "Running")
def test_run_to_completion_check_conflict_exception( self, mock_get_status, mock_read_namespaced_job, mock_check_cancelled, mock_create_namespaced_job): """ Checking if the Job status is complete when an ApiException of 409 is raised """ for executor in self.data['executors']: jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status = job.run_to_completion(taskmaster.args.poll_interval, taskmaster.check_cancelled, taskmaster.args.pod_timeout) self.assertEqual(status, "Complete")
def test_get_job_status_for_wrong_image(self, mock_read_namespaced_job, mock_list_namespaced_pod): """ Assuming image name is wrong, the testcase will check if job status returned from the method is "running" during the default pod timeout. """ mock_list_namespaced_pod.return_value = list_namespaced_pod_error_ImagePullBackOff( 2) mock_read_namespaced_job.return_value = read_namespaced_job_pending(2) executor = self.data['executors'][0] jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status, all_pods_running = job.get_status(False) self.assertEqual(status, "Running")
def test_run_to_completion_cancelled(self, mock_get_status, mock_create_namespaced_job, mock_check_cancelled, mock_job_delete): """ Checking if the Job is cancelled """ for executor in self.data['executors']: jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status = job.run_to_completion(taskmaster.args.poll_interval, taskmaster.check_cancelled, taskmaster.args.pod_timeout) self.assertEqual(status, "Cancelled")
def test_get_job_status_ImagaPullBackoff_error(self, mock_list_namespaced_pod, mock_read_namespaced_job): """ Checking whether the job state is 'error', when the pod failed to start and if reason for pod failure is ImagePullBackOff """ mock_list_namespaced_pod.return_value = list_namespaced_pod_error_ImagePullBackOff( ) executor = self.data['executors'][0] jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) job.timeout = 50 status, all_pods_running = job.get_status(False) self.assertEqual(status, "Error")
def test_run_to_completion_error(self, mock_create_namespaced_job, mock_check_cancelled, mock_job_delete, mock_read_namespaced_job, mock_list_namespaced_pod): """ Testing if the job state is 'error' when the status of the pod is in pending state and reason is ImagePullBackOff """ mock_list_namespaced_pod.return_value = list_namespaced_pod_error_ImagePullBackOff( 10) for executor in self.data['executors']: jobname = executor['metadata']['name'] job = Job(executor, jobname, taskmaster.args.namespace) status = job.run_to_completion(1, taskmaster.check_cancelled, 120) self.assertEqual(status, "Error")
def test_job(self): """ Testing if Job object is getting created successfully """ job = Job({'metadata': {'name': 'test'}}) self.assertEqual(job.name, 'task-job') self.assertEqual(job.namespace, 'default')
def test_job(self): job = Job({'metadata': {'name': 'test'}}) self.assertEqual(job.name, 'task-job') self.assertEqual(job.namespace, 'default')