def get_task_details(task_id): """ Return various information about the specified task. Args: task_id (int): The ID of the desired task. Returns: task_details (dict): Information about the specified task. """ task = Task.objects.get(id=task_id) workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) prerequisites = previously_completed_task_data(task) task_details = { 'workflow': { 'slug': workflow.slug, 'name': workflow.name }, 'step': { 'slug': step.slug, 'name': step.name }, 'task_id': task.id, 'project': { 'id': task.project.id, 'details': task.project.short_description, 'review_document_url': task.project.review_document_url, 'project_data': task.project.project_data }, 'prerequisites': prerequisites } return task_details
def previously_completed_task_data(task): """ Returns a dict mapping task prerequisites onto their latest task assignment information. The dict is of the form: {'previous-slug': {task_assignment_data}, ...} Args: task (orchestra.models.Task): The specified task object. Returns: prerequisites (dict): A dict mapping task prerequisites onto their latest task assignment information.. """ workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) prerequisites = {} for required_step in step.creation_depends_on: required_task = Task.objects.get(step_slug=required_step.slug, project=task.project) if required_task.status != Task.Status.COMPLETE: raise TaskDependencyError('Task depenency is not satisfied') task_assignment = (required_task.assignments .order_by('-assignment_counter')[0]) task_details = get_task_details(required_task.id) task_assignment_details = get_task_assignment_details(task_assignment) task_assignment_details.update(task_details) # TODO(kkamalov): check for circular prerequisites prerequisites[required_task.step_slug] = task_assignment_details return prerequisites
def _worker_certified_for_task(worker, task, role, task_class=WorkerCertification.TaskClass.REAL): """ Check whether worker is certified for a given task, role, and task class. Args: worker (orchestra.models.Worker): The specified worker object. task (orchestra.models.Task): The specified task object. task_class (orchestra.models.WorkerCertification.TaskClass): The specified task class. Returns: certified_for_task (bool): True if worker is certified for a given task, role, and task class. """ workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) match_count = ( WorkerCertification .objects .filter(worker=worker, role=role, task_class=task_class, certification__slug__in=step.required_certifications) .count()) certified_for_task = len(step.required_certifications) == match_count return certified_for_task
def _is_review_needed(task): """ Determine if `task` will be reviewed according to its step policy. Args: task (orchestra.models.Task): The specified task object. Returns: review_needed (bool): True if review is determined to be needed according to the task's step policy. Raises: orchestra.core.errors.ReviewPolicyError: The specified review policy type is not supported. """ workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) policy = step.review_policy.get('policy', None) sample_rate = step.review_policy.get('rate', None) max_reviews = step.review_policy.get('max_reviews', None) if (policy == 'sampled_review' and sample_rate is not None and max_reviews is not None): task_assignment_count = task.assignments.all().count() if max_reviews <= task_assignment_count - 1: return False return random.random() < sample_rate elif policy == 'no_review': return False else: raise ReviewPolicyError('Review policy incorrectly specified.')
def save(self, *args, **kwargs): workflow = get_workflow_by_slug(self.task.project.workflow_slug) step = workflow.get_step(self.task.step_slug) if step.worker_type == Step.WorkerType.HUMAN: if self.worker is None: raise ModelSaveError('Worker has to be present ' 'if worker type is Human') else: if self.worker is not None: raise ModelSaveError('Worker should not be assigned ' 'if worker type is Machine') super(TaskAssignment, self).save(*args, **kwargs)
def test_sampled_get_next_task_status(self): task = self.tasks['processing_task'] workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) step.review_policy = {'policy': 'sampled_review', 'rate': 0.5, 'max_reviews': 1} task.status = Task.Status.PROCESSING complete_count = 0 for i in range(0, 1000): next_status = get_next_task_status( task, TaskAssignment.SnapshotType.SUBMIT) complete_count += next_status == Task.Status.COMPLETE self.assertTrue(complete_count > 400) self.assertTrue(complete_count < 600)
def get_workflow_steps(workflow_slug): """Get a sorted list of steps for a project Returns a list of (slug, short_description) tuples topologically sorted so that earlier steps are prerequisites for later ones. """ workflow = get_workflow_by_slug(workflow_slug) # Build a directed graph of the step dependencies graph = {} for step in workflow.get_steps(): graph[step.slug] = [dependency.slug for dependency in step.creation_depends_on] queue = [] for key, value in graph.items(): if value == []: queue.append(key) # TODO(derek): prevent the MalformedDependencyExceptions from being # possible by baking protection into the Workflow/Step classes if not len(queue): raise MalformedDependencyException("All %s workflow steps have " "dependencies. There is no start " "point." % workflow_slug) # Build the steps list in order using a breadth-first-like traversal of the # step dependency graph steps = [] already_added = set() while len(queue): current_node = queue.pop(0) if current_node in already_added: continue already_added.add(current_node) current_step = workflow.get_step(current_node) steps.append({'slug': current_node, 'description': current_step.description, 'worker_type': current_step.worker_type}) for key, dependencies in graph.items(): if (current_node in dependencies and key not in already_added): queue.append(key) return steps
def create_subsequent_tasks(project): """ Create tasks for a given project whose dependencies have been completed. Args: project (orchestra.models.Project): The project for which to create tasks. Returns: project (orchestra.models.Project): The modified project object. """ workflow = get_workflow_by_slug(project.workflow_slug) all_step_slugs = workflow.get_step_slugs() # get all completed tasks associated with a given project completed_tasks = Task.objects.filter(status=Task.Status.COMPLETE, project=project) completed_step_slugs = {task.step_slug for task in completed_tasks} for step_slug in all_step_slugs: if (step_slug in completed_step_slugs or Task.objects.filter(project=project, step_slug=step_slug).exists()): continue step = workflow.get_step(step_slug) if _are_desired_steps_completed_on_project( step.creation_depends_on, completed_tasks=completed_tasks): # create new task and task_assignment task = Task(step_slug=step_slug, project=project, status=Task.Status.AWAITING_PROCESSING) task.save() _preassign_workers(task) if step.worker_type == Step.WorkerType.MACHINE: machine_step_scheduler_module = import_module( settings.MACHINE_STEP_SCHEDULER[0]) machine_step_scheduler_class = getattr( machine_step_scheduler_module, settings.MACHINE_STEP_SCHEDULER[1]) machine_step_scheduler = machine_step_scheduler_class() machine_step_scheduler.schedule(project.id, step_slug)
def test_malformed_assignment_policy(self): project = self.projects['assignment_policy'] # Machine should not have an assignment policy workflow = get_workflow_by_slug('assignment_policy_workflow') machine_step = Step( slug='machine_step', worker_type=Step.WorkerType.MACHINE, assignment_policy={'policy': 'previously_completed_steps', 'steps': ['step_0']}, creation_depends_on=[workflow.get_step('step_0')], function=lambda *args: None, ) workflow.add_step(machine_step) # Create first task in project create_subsequent_tasks(project) self.assertEquals(project.tasks.count(), 1) # Assign initial task to worker 0 and mark as complete initial_task = assign_task(self.workers[4].id, project.tasks.first().id) initial_task.status = Task.Status.COMPLETE initial_task.save() # Cannot preassign machine task with self.assertRaises(AssignmentPolicyError): create_subsequent_tasks(project) # Reset project project.tasks.all().delete() # Machine should not be member of assignment policy (workflow.get_step('step_0') .assignment_policy) = {'policy': 'previously_completed_steps', 'steps': ['machine_step']} with self.assertRaises(AssignmentPolicyError): create_subsequent_tasks(project) # Reset workflow and project (workflow.get_step('step_0') .assignment_policy) = {'policy': 'anyone_certified'} del workflow.steps['machine_step'] project.tasks.all().delete()
def execute(project_id, step_slug): project = Project.objects.get(id=project_id) workflow = get_workflow_by_slug(project.workflow_slug) step = workflow.get_step(step_slug) task = Task.objects.get(project=project, step_slug=step_slug) # Run machine function if step.worker_type != Step.WorkerType.MACHINE: raise MachineExecutionError('Step worker type is not machine') if task.status == Task.Status.COMPLETE: raise MachineExecutionError('Task assignment already completed') # Machine tasks are only assigned to one worker/machine, # so they should only have one task assignment, # and should never be submitted for review. count = task.assignments.count() if count > 1: raise MachineExecutionError('At most 1 assignment per machine task') elif count == 1: task_assignment = task.assignments.first() if task_assignment.status == TaskAssignment.Status.SUBMITTED: raise MachineExecutionError('Task assignment completed ' 'but task is not!') else: task_assignment = ( TaskAssignment.objects .create(task=task, status=TaskAssignment.Status.PROCESSING, in_progress_task_data={}, snapshots={})) prerequisites = previously_completed_task_data(task) task_data = step.function(project.project_data, prerequisites) task_assignment.status = TaskAssignment.Status.SUBMITTED task_assignment.in_progress_task_data = task_data task_assignment.save() task.status = Task.Status.COMPLETE task.save() create_subsequent_tasks(project)
def test_get_new_task_assignment_entry_level(self): # Entry-level assignment self.assertEquals(Task.objects .filter(status=Task.Status.AWAITING_PROCESSING) .count(), 1) with self.assertRaises(WorkerCertificationError): get_new_task_assignment(self.workers[5], Task.Status.PENDING_REVIEW) # assign a new task to a worker assignment = get_new_task_assignment(self.workers[5], Task.Status.AWAITING_PROCESSING) self.assertTrue(assignment is not None) self.assertEquals(assignment.task.status, Task.Status.PROCESSING) # No more tasks left in AWAITING_PROCESSING with self.assertRaises(NoTaskAvailable): get_new_task_assignment(self.workers[5], Task.Status.AWAITING_PROCESSING) # Worker should not be served machine tasks workflow_slug = 'test_workflow_2' workflow = get_workflow_by_slug(workflow_slug) simple_machine = workflow.get_step('simple_machine') simple_machine.creation_depends_on = [] project = Project.objects.create(workflow_slug=workflow_slug, short_description='', priority=0, task_class=0) Task.objects.create(project=project, status=Task.Status.AWAITING_PROCESSING, step_slug='simple_machine') with self.assertRaises(NoTaskAvailable): get_new_task_assignment(self.workers[5], Task.Status.AWAITING_PROCESSING)
def _assign_worker_from_previously_completed_steps(task, related_steps): """ Assign a new task to the entry-level worker of the specified tasks. If no worker can be assigned, return the unmodified task. Args: task (orchestra.models.Task): The newly created task to assign. related_steps ([orchestra.workflow.steps]): List of steps from which to attempt to assign a worker. Returns: task (orchestra.models.Task): The modified task object. Raises: orchestra.core.errors.AssignmentPolicyError: Machine steps cannot be included in an assignment policy. """ workflow = get_workflow_by_slug(task.project.workflow_slug) for slug in related_steps: if workflow.get_step(slug).worker_type == Step.WorkerType.MACHINE: raise AssignmentPolicyError('Machine step should not be ' 'member of assignment policy') related_tasks = Task.objects.filter(step_slug__in=related_steps, project=task.project) for related_task in related_tasks: entry_level_assignment = assignment_history(related_task).first() if entry_level_assignment and entry_level_assignment.worker: try: return assign_task(entry_level_assignment.worker.id, task.id) except: # Task could not be assigned to related worker, try with # another related worker logger.warning('Tried to assign worker %s to step %s, for ' 'which they are not certified', entry_level_assignment.worker.id, task.step_slug, exc_info=True) return task
def _preassign_workers(task): """ Assign a new task to a worker according to its assignment policy, leaving the task unchanged if policy not present. Args: task (orchestra.models.Task): The newly created task to assign. Returns: task (orchestra.models.Task): The modified task object. Raises: orchestra.core.errors.AssignmentPolicyError: The specified assignment policy type is not supported or a machine step is given an assignment policy. """ workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) policy = step.assignment_policy.get('policy') related_steps = step.assignment_policy.get('steps') if step.worker_type == Step.WorkerType.MACHINE: if policy: raise AssignmentPolicyError('Machine step should not have ' 'assignment policy.') elif (policy == 'previously_completed_steps' and related_steps is not None): task = _assign_worker_from_previously_completed_steps(task, related_steps) elif policy == 'anyone_certified': # Leave the task in the awaiting processing pool pass else: raise AssignmentPolicyError('Assignment policy incorrectly specified.') return task
def test_task_assignment_saving(self): """ Ensure that workers are required for human tasks, and no workers are required for machine tasks. """ workflow_slug = 'test_workflow_2' workflow = get_workflow_by_slug(workflow_slug) simple_machine = workflow.get_step('simple_machine') simple_machine.creation_depends_on = [] project = Project.objects.create(workflow_slug=workflow_slug, short_description='', priority=0, task_class=0) task = Task.objects.create(project=project, status=Task.Status.PROCESSING, step_slug='simple_machine') # We expect an error because a worker # is being saved on a machine task. with self.assertRaises(ModelSaveError): TaskAssignment.objects.create(worker=self.workers[0], task=task, status=0, in_progress_task_data={}, snapshots={}) task = Task.objects.create(project=project, status=Task.Status.PROCESSING, step_slug='step4') # We expect an error because no worker # is being saved on a human task with self.assertRaises(ModelSaveError): TaskAssignment.objects.create(task=task, status=0, in_progress_task_data={}, snapshots={})
def test_legal_get_next_task_status(self): task = self.tasks['processing_task'] workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) step.review_policy = {} task.status = Task.Status.PROCESSING with self.assertRaises(ReviewPolicyError): get_next_task_status(task, TaskAssignment.SnapshotType.SUBMIT) step.review_policy = {'policy': 'sampled_review', 'rate': 1, 'max_reviews': 1} self.assertEquals( get_next_task_status(task, TaskAssignment.SnapshotType.SUBMIT), Task.Status.PENDING_REVIEW) step.review_policy = {'policy': 'sampled_review', 'rate': 0, 'max_reviews': 1} self.assertEquals( get_next_task_status(task, TaskAssignment.SnapshotType.SUBMIT), Task.Status.COMPLETE) task.status = Task.Status.POST_REVIEW_PROCESSING self.assertEquals( get_next_task_status(task, TaskAssignment.SnapshotType.SUBMIT), Task.Status.REVIEWING) task = self.tasks['review_task'] task.status = Task.Status.REVIEWING step.review_policy = {'policy': 'sampled_review', 'rate': 1, 'max_reviews': 0} self.assertEquals( get_next_task_status(task, TaskAssignment.SnapshotType.ACCEPT), Task.Status.COMPLETE) step.review_policy = {'policy': 'sampled_review', 'rate': 1, 'max_reviews': 2} self.assertEquals( get_next_task_status(task, TaskAssignment.SnapshotType.ACCEPT), Task.Status.PENDING_REVIEW) # after max reviews done a task goes to state complete TaskAssignment.objects.create(worker=self.workers[1], task=task, status=TaskAssignment.Status.SUBMITTED, assignment_counter=1, in_progress_task_data={}, snapshots=empty_snapshots()) task.save() step.review_policy = {'policy': 'sampled_review', 'rate': 1, 'max_reviews': 1} self.assertEquals( get_next_task_status(task, TaskAssignment.SnapshotType.ACCEPT), Task.Status.COMPLETE)
def test_submit_entry_level_task_assignment(self): # user 0 only has certification for entry level tasks response = (self.clients[0].get( '/orchestra/api/interface/new_task_assignment/entry_level/')) self.assertEquals(response.status_code, 200) returned = json.loads(response.content.decode('utf-8')) task_id = returned['id'] task = Task.objects.get(id=task_id) # user is not assigned to a task response = self._submit_assignment(self.clients[1], task_id) self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'Task assignment with worker is in broken state.') # task does not exist response = self._submit_assignment(self.clients[1], -1) self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'No task for given id') # user 0 can only submit a task not accept response = self._submit_assignment( self.clients[0], task_id, command='accept') self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'Only reviewer can accept the task.') # user 0 can only submit a task not reject response = self._submit_assignment( self.clients[0], task_id, command='reject') self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'Only reviewer can reject the task.') # user 0 can't call illegal commands response = self._submit_assignment( self.clients[0], task_id, command='approve') self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'Illegal command') data = {'submit_key1': 'submit_val1'} # user 0 can't submit a task if its submission prerequisites aren't # complete workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) step.submission_depends_on = [Step(slug='imaginary_test_step')] response = self._submit_assignment( self.clients[0], task_id) self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'Submission prerequisites are not complete.') step.submission_depends_on = [] data = {'submit_key1': 'submit_val1'} # user 0 submits a task response = self._submit_assignment( self.clients[0], task_id, data=data) self.assertEquals(response.status_code, 200) self._verify_good_task_assignment_information( self.clients[0], {'task_id': task_id}, task.project.short_description, 'Submitted', 'Pending Review', False, True, data, self.workers[0], work_times_seconds=[1]) # user cannot resubmit a task response = self._submit_assignment( self.clients[0], task_id) self.assertEquals(response.status_code, 400) returned = json.loads(response.content.decode('utf-8')) self.assertEquals(returned['message'], 'Worker is not allowed to submit')
def submit_task(task_id, task_data, snapshot_type, worker, work_time_seconds): """ Returns a dict mapping task prerequisites onto their latest task assignment information. The dict is of the form: {'previous-slug': {task_assignment_data}, ...} Args: task_id (int): The ID of the task to submit. task_data (str): A JSON blob of task data to submit. snapshot_type (orchestra.models.TaskAssignment.SnapshotType): The action to take upon task submission (e.g., SUBMIT, ACCEPT, REJECT). worker (orchestra.models.Worker): The worker submitting the task. work_time_seconds (int): The time taken by the worker on the latest iteration of their task assignment. Returns: task (orchestra.models.Task): The modified task object. Raises: orchestra.core.errors.IllegalTaskSubmission: Submission prerequisites for the task are incomplete or the assignment is in a non-processing state. orchestra.core.errors.TaskAssignmentError: Worker belongs to more than one assignment for the given task. orchestra.core.errors.TaskStatusError: Task has already been completed. """ task = Task.objects.get(id=task_id) workflow = get_workflow_by_slug(task.project.workflow_slug) step = workflow.get_step(task.step_slug) if not _are_desired_steps_completed_on_project(step.submission_depends_on, project=task.project): raise IllegalTaskSubmission('Submission prerequisites are not ' 'complete.') if task.status == Task.Status.COMPLETE: raise TaskStatusError('Task already completed') # Use select_for_update to prevent concurrency issues with save_task. # See https://github.com/unlimitedlabs/orchestra/issues/2. assignments = (TaskAssignment.objects.select_for_update() .filter(worker=worker, task=task)) # Worker can belong to only one assignment for a given task. if not assignments.count() == 1: raise TaskAssignmentError( 'Task assignment with worker is in broken state.') assignment = assignments[0] if assignment.status != TaskAssignment.Status.PROCESSING: raise IllegalTaskSubmission('Worker is not allowed to submit') next_status = get_next_task_status(task, snapshot_type) assignment.in_progress_task_data = task_data assignment.snapshots['snapshots'].append( {'data': assignment.in_progress_task_data, 'datetime': datetime.utcnow().isoformat(), 'type': snapshot_type, 'work_time_seconds': work_time_seconds }) assignment.status = TaskAssignment.Status.SUBMITTED assignment.save() previous_status = task.status task.status = next_status task.save() if task.status == Task.Status.REVIEWING: update_related_assignment_status(task, assignment.assignment_counter + 1, assignment.in_progress_task_data) elif task.status == Task.Status.POST_REVIEW_PROCESSING: update_related_assignment_status(task, assignment.assignment_counter - 1, assignment.in_progress_task_data) elif task.status == Task.Status.COMPLETE: create_subsequent_tasks(task.project) notify_status_change(task, previous_status) return task
def tasks_assigned_to_worker(worker): """ Get all the tasks associated with `worker`. Args: worker (orchestra.models.Worker): The specified worker object. Returns: tasks_assigned (dict): A dict with information about the worker's tasks, used in displaying the Orchestra dashboard. """ valid_task_assignments = TaskAssignment.objects.exclude( task__status=Task.Status.ABORTED) # get all active task assignments for a user active_task_assignments = ( valid_task_assignments .filter( worker=worker, status=TaskAssignment.Status.PROCESSING) .order_by('-task__project__priority', 'task__project__start_datetime')) inactive_task_assignments = ( valid_task_assignments .filter( worker=worker, status=TaskAssignment.Status.SUBMITTED ) .exclude(task__status=Task.Status.COMPLETE) .order_by('-task__project__priority', 'task__project__start_datetime')) inactive_processing_task_assignments = [] inactive_review_task_assignments = [] for task_assignment in inactive_task_assignments: if ( valid_task_assignments .filter( status=TaskAssignment.Status.PROCESSING, task__id=task_assignment.task.id, assignment_counter__lt=task_assignment.assignment_counter) .exists()): inactive_processing_task_assignments.append(task_assignment) else: inactive_review_task_assignments.append(task_assignment) # TODO(marcua): Do a better job of paginating than cutting off to the most # recent 20 tasks. complete_task_assignments = ( valid_task_assignments .filter(worker=worker, task__status=Task.Status.COMPLETE) .order_by('-task__project__priority', '-task__project__start_datetime')[:20]) task_assignments_overview = { 'returned': ( active_task_assignments .filter(task__status=Task.Status.POST_REVIEW_PROCESSING)), 'in_progress': ( active_task_assignments .exclude(task__status=Task.Status.POST_REVIEW_PROCESSING)), 'pending_review': inactive_review_task_assignments, 'pending_processing': inactive_processing_task_assignments, 'complete': complete_task_assignments} tasks_assigned = {} for state, task_assignments in iter(task_assignments_overview.items()): tasks_val = [] for task_assignment in task_assignments: workflow = get_workflow_by_slug( task_assignment.task.project.workflow_slug) step = workflow.get_step( task_assignment.task.step_slug) # TODO(marcua): project should be workflow here, no? tasks_val.append({'id': task_assignment.task.id, 'step': step.name, 'project': workflow.name, 'detail': task_assignment.task.project.short_description}) tasks_assigned[state] = tasks_val return tasks_assigned