def create_classifiers(training_workflow_uuid, classifier_set): """ Upload trained classifiers and mark the workflow complete. If grading tasks were submitted before any classifiers were trained, this call will automatically reschedule those tasks. Args: training_workflow_uuid (str): The UUID of the training workflow. classifier_set (dict): Mapping of criteria names to serialized classifiers. Returns: None Raises: AITrainingRequestError AITrainingInternalError """ try: workflow = AITrainingWorkflow.objects.get(uuid=training_workflow_uuid) # If the task is executed multiple times, the classifier set may already # have been created. If so, log it, then return immediately. if workflow.is_complete: msg = u"AI training workflow with UUID {} already has trained classifiers.".format( workflow.uuid) logger.info(msg) else: workflow.complete(classifier_set) logger.info(( u"Created trained classifiers for the AI training workflow with UUID {workflow_uuid} " u"(using algorithm ID {algorithm_id})").format( workflow_uuid=workflow.uuid, algorithm_id=workflow.algorithm_id)) except AITrainingWorkflow.DoesNotExist: msg = (u"Could not retrieve AI training workflow with UUID {}" ).format(training_workflow_uuid) raise AITrainingRequestError(msg) except NoTrainingExamples as ex: logger.exception(ex) raise AITrainingInternalError(ex) except (IncompleteClassifierSet, InvalidRubricSelection) as ex: msg = (u"An error occurred while creating the classifier set " u"for the training workflow with UUID {uuid}: {ex}").format( uuid=training_workflow_uuid, ex=ex) raise AITrainingRequestError(msg) except (ClassifierSerializeError, ClassifierUploadError, DatabaseError) as ex: msg = (u"An unexpected error occurred while creating the classifier " u"set for training workflow UUID {uuid}: {ex}").format( uuid=training_workflow_uuid, ex=ex) logger.exception(msg) raise AITrainingInternalError(msg)
def is_training_workflow_complete(workflow_uuid): """ Check whether the training workflow is complete. Args: workflow_uuid (str): The UUID of the training workflow Returns: bool Raises: AITrainingRequestError AITrainingInternalError """ try: return AITrainingWorkflow.is_workflow_complete(workflow_uuid) except AITrainingWorkflow.DoesNotExist: msg = (u"Could not retrieve training workflow " u"with uuid {uuid} to check whether it's complete.").format( uuid=workflow_uuid) raise AITrainingRequestError(msg) except DatabaseError: msg = ( u"An unexpected error occurred while checking " u"the training workflow with uuid {uuid} for completeness").format( uuid=workflow_uuid) raise AITrainingInternalError(msg)
def reschedule_unfinished_tasks(course_id=None, item_id=None, task_type=u"grade"): """ Check for unfinished tasks (both grading and training) and reschedule them. Optionally restrict by course/item ID and task type. Default use case is to only reschedule the unfinished grade tasks. Applied use case (with button in staff mixin) is to call without argument, and to reschedule grades only. Keyword Arguments: course_id (unicode): Restrict to unfinished tasks in a particular course. item_id (unicode): Restrict to unfinished tasks for a particular item in a course. NOTE: if you specify the item ID, you must also specify the course ID. task_type (unicode): Either "grade" or "train". Restrict to unfinished tasks of this type. if task_type is specified as None, both training and grading will be rescheduled, in that order. Raises: AIGradingInternalError AITrainingInternalError AIReschedulingRequestError """ if course_id is None or item_id is None: msg = u"Rescheduling tasks was not possible because the course_id / item_id was not assigned." logger.exception(msg) raise AIReschedulingRequestError # Reschedules all of the training tasks if task_type == u"train" or task_type is None: try: training_tasks.reschedule_training_tasks.apply_async( args=[course_id, item_id]) except ANTICIPATED_CELERY_ERRORS as ex: msg = ( u"Rescheduling training tasks for course {cid} and item {iid} failed with exception: {ex}" ).format(cid=course_id, iid=item_id, ex=ex) logger.exception(msg) raise AITrainingInternalError(ex) # Reschedules all of the grading tasks if task_type == u"grade" or task_type is None: try: grading_tasks.reschedule_grading_tasks.apply_async( args=[course_id, item_id]) except ANTICIPATED_CELERY_ERRORS as ex: msg = ( u"Rescheduling grading tasks for course {cid} and item {iid} failed with exception: {ex}" ).format(cid=course_id, iid=item_id, ex=ex) logger.exception(msg) raise AIGradingInternalError(ex)
def setUp(self): """ Sets up each test so that it will have unfinished tasks of both types """ # 1) Schedule Grading, have the scheduling succeeed but the grading fail because no classifiers exist for _ in range(0, 10): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) self.submission_uuid = submission['uuid'] ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # 2) Schedule Training, have it INTENTIONALLY fail. Now we are a point where both parts need to be rescheduled patched_method = 'openassessment.assessment.api.ai.training_tasks.train_classifiers.apply_async' with mock.patch(patched_method) as mock_train_classifiers: mock_train_classifiers.side_effect = AITrainingInternalError( 'Training Classifiers Failed for some Reason.') with self.assertRaises(AITrainingInternalError): ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID) self._assert_complete(training_done=False, grading_done=False)
def train_classifiers(rubric_dict, examples, course_id, item_id, algorithm_id): """ Schedule a task to train classifiers. All training examples must match the rubric! After training of classifiers completes successfully, all AIGradingWorkflows that are incomplete will be automatically rescheduled to complete. Args: rubric_dict (dict): The rubric used to assess the classifiers. examples (list of dict): Serialized training examples. algorithm_id (unicode): The ID of the algorithm used to train the classifiers. Returns: training_workflow_uuid (str): The UUID of the training workflow. Usually the caller will not need this (since the workers are parametrized by training workflow UUID), but it's useful for testing. Raises: AITrainingRequestError AITrainingInternalError Example usage: >>> train_classifiers(rubric, examples, 'ease') '10df7db776686822e501b05f452dc1e4b9141fe5' """ # Get or create the rubric and training examples try: examples = deserialize_training_examples(examples, rubric_dict) except (InvalidRubric, InvalidTrainingExample, InvalidRubricSelection) as ex: msg = u"Could not parse rubric and/or training examples: {ex}".format(ex=ex) raise AITrainingRequestError(msg) # Create the workflow model try: workflow = AITrainingWorkflow.start_workflow(examples, course_id, item_id, algorithm_id) except NoTrainingExamples as ex: raise AITrainingRequestError(ex) except: msg = ( u"An unexpected error occurred while creating " u"the AI training workflow" ) logger.exception(msg) raise AITrainingInternalError(msg) # Schedule the task, parametrized by the workflow UUID try: training_tasks.train_classifiers.apply_async(args=[workflow.uuid]) except ANTICIPATED_CELERY_ERRORS as ex: msg = ( u"An unexpected error occurred while scheduling incomplete training workflows with" u" course_id={cid} and item_id={iid}: {ex}" ).format(cid=course_id, iid=item_id, ex=ex) logger.exception(msg) raise AITrainingInternalError(msg) # Return the workflow UUID return workflow.uuid
def get_training_task_params(training_workflow_uuid): """ Retrieve the training examples and algorithm ID associated with a training task. Args: training_workflow_uuid (str): The UUID of the training workflow. Returns: dict with keys: * training_examples (list of dict): The examples used to train the classifiers. * course_id (unicode): The course ID that the training task is associated with. * item_id (unicode): Identifies the item that the AI will be training to grade. * algorithm_id (unicode): The ID of the algorithm to use for training. Raises: AITrainingRequestError AITrainingInternalError Example usage: >>> params = get_training_task_params('abcd1234') >>> params['algorithm_id'] u'ease' >>> params['training_examples'] [ { "text": u"Example answer number one", "scores": { "vocabulary": 1, "grammar": 2 } }, { "text": u"Example answer number two", "scores": { "vocabulary": 3, "grammar": 1 } } ] """ try: workflow = AITrainingWorkflow.objects.get(uuid=training_workflow_uuid) returned_examples = [] for example in workflow.training_examples.all(): scores = { option.criterion.name: option.points for option in example.options_selected.all() } returned_examples.append({ 'text': essay_text_from_submission({'answer': example.answer}), 'scores': scores }) return { 'training_examples': returned_examples, 'algorithm_id': workflow.algorithm_id, 'course_id': workflow.course_id, 'item_id': workflow.item_id } except AITrainingWorkflow.DoesNotExist: msg = (u"Could not retrieve AI training workflow with UUID {}" ).format(training_workflow_uuid) raise AITrainingRequestError(msg) except DatabaseError: msg = (u"An unexpected error occurred while retrieving " u"training examples for the AI training workflow with UUID {}" ).format(training_workflow_uuid) logger.exception(msg) raise AITrainingInternalError(msg)