Beispiel #1
0
def create_classifiers(training_workflow_uuid, classifier_set):
    """
    Upload trained classifiers and mark the workflow complete.

    If grading tasks were submitted before any classifiers were trained,
    this call will automatically reschedule those tasks.

    Args:
        training_workflow_uuid (str): The UUID of the training workflow.
        classifier_set (dict): Mapping of criteria names to serialized classifiers.

    Returns:
        None

    Raises:
        AITrainingRequestError
        AITrainingInternalError

    """
    try:
        workflow = AITrainingWorkflow.objects.get(uuid=training_workflow_uuid)

        # If the task is executed multiple times, the classifier set may already
        # have been created.  If so, log it, then return immediately.
        if workflow.is_complete:
            msg = u"AI training workflow with UUID {} already has trained classifiers.".format(
                workflow.uuid)
            logger.info(msg)
        else:
            workflow.complete(classifier_set)
            logger.info((
                u"Created trained classifiers for the AI training workflow with UUID {workflow_uuid} "
                u"(using algorithm ID {algorithm_id})").format(
                    workflow_uuid=workflow.uuid,
                    algorithm_id=workflow.algorithm_id))
    except AITrainingWorkflow.DoesNotExist:
        msg = (u"Could not retrieve AI training workflow with UUID {}"
               ).format(training_workflow_uuid)
        raise AITrainingRequestError(msg)
    except NoTrainingExamples as ex:
        logger.exception(ex)
        raise AITrainingInternalError(ex)
    except (IncompleteClassifierSet, InvalidRubricSelection) as ex:
        msg = (u"An error occurred while creating the classifier set "
               u"for the training workflow with UUID {uuid}: {ex}").format(
                   uuid=training_workflow_uuid, ex=ex)
        raise AITrainingRequestError(msg)
    except (ClassifierSerializeError, ClassifierUploadError,
            DatabaseError) as ex:
        msg = (u"An unexpected error occurred while creating the classifier "
               u"set for training workflow UUID {uuid}: {ex}").format(
                   uuid=training_workflow_uuid, ex=ex)
        logger.exception(msg)
        raise AITrainingInternalError(msg)
Beispiel #2
0
def is_training_workflow_complete(workflow_uuid):
    """
    Check whether the training workflow is complete.

    Args:
        workflow_uuid (str): The UUID of the training workflow

    Returns:
        bool

    Raises:
        AITrainingRequestError
        AITrainingInternalError

    """
    try:
        return AITrainingWorkflow.is_workflow_complete(workflow_uuid)
    except AITrainingWorkflow.DoesNotExist:
        msg = (u"Could not retrieve training workflow "
               u"with uuid {uuid} to check whether it's complete.").format(
                   uuid=workflow_uuid)
        raise AITrainingRequestError(msg)
    except DatabaseError:
        msg = (
            u"An unexpected error occurred while checking "
            u"the training workflow with uuid {uuid} for completeness").format(
                uuid=workflow_uuid)
        raise AITrainingInternalError(msg)
Beispiel #3
0
def reschedule_unfinished_tasks(course_id=None,
                                item_id=None,
                                task_type=u"grade"):
    """
    Check for unfinished tasks (both grading and training) and reschedule them.
    Optionally restrict by course/item ID and task type. Default use case is to
    only reschedule the unfinished grade tasks. Applied use case (with button in
    staff mixin) is to call without argument, and to reschedule grades only.

    Keyword Arguments:
        course_id (unicode): Restrict to unfinished tasks in a particular course.
        item_id (unicode): Restrict to unfinished tasks for a particular item in a course.
            NOTE: if you specify the item ID, you must also specify the course ID.
        task_type (unicode): Either "grade" or "train".  Restrict to unfinished tasks of this type.
            if task_type is specified as None, both training and grading will be rescheduled, in that order.

    Raises:
        AIGradingInternalError
        AITrainingInternalError
        AIReschedulingRequestError
    """

    if course_id is None or item_id is None:
        msg = u"Rescheduling tasks was not possible because the course_id / item_id was not assigned."
        logger.exception(msg)
        raise AIReschedulingRequestError

    # Reschedules all of the training tasks
    if task_type == u"train" or task_type is None:
        try:
            training_tasks.reschedule_training_tasks.apply_async(
                args=[course_id, item_id])
        except ANTICIPATED_CELERY_ERRORS as ex:
            msg = (
                u"Rescheduling training tasks for course {cid} and item {iid} failed with exception: {ex}"
            ).format(cid=course_id, iid=item_id, ex=ex)
            logger.exception(msg)
            raise AITrainingInternalError(ex)

    # Reschedules all of the grading tasks
    if task_type == u"grade" or task_type is None:
        try:
            grading_tasks.reschedule_grading_tasks.apply_async(
                args=[course_id, item_id])
        except ANTICIPATED_CELERY_ERRORS as ex:
            msg = (
                u"Rescheduling grading tasks for course {cid} and item {iid} failed with exception: {ex}"
            ).format(cid=course_id, iid=item_id, ex=ex)
            logger.exception(msg)
            raise AIGradingInternalError(ex)
Beispiel #4
0
    def setUp(self):
        """
        Sets up each test so that it will have unfinished tasks of both types
        """
        # 1) Schedule Grading, have the scheduling succeeed but the grading fail because no classifiers exist
        for _ in range(0, 10):
            submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
            self.submission_uuid = submission['uuid']
            ai_api.on_init(self.submission_uuid,
                           rubric=RUBRIC,
                           algorithm_id=ALGORITHM_ID)

        # 2) Schedule Training, have it INTENTIONALLY fail. Now we are a point where both parts need to be rescheduled
        patched_method = 'openassessment.assessment.api.ai.training_tasks.train_classifiers.apply_async'
        with mock.patch(patched_method) as mock_train_classifiers:
            mock_train_classifiers.side_effect = AITrainingInternalError(
                'Training Classifiers Failed for some Reason.')
            with self.assertRaises(AITrainingInternalError):
                ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID,
                                         ALGORITHM_ID)

        self._assert_complete(training_done=False, grading_done=False)
Beispiel #5
0
def train_classifiers(rubric_dict, examples, course_id, item_id, algorithm_id):
    """
    Schedule a task to train classifiers.
    All training examples must match the rubric!
    After training of classifiers completes successfully, all AIGradingWorkflows that are incomplete will be
    automatically rescheduled to complete.

    Args:
        rubric_dict (dict): The rubric used to assess the classifiers.
        examples (list of dict): Serialized training examples.
        algorithm_id (unicode): The ID of the algorithm used to train the classifiers.

    Returns:
        training_workflow_uuid (str): The UUID of the training workflow.
            Usually the caller will not need this (since the workers
            are parametrized by training workflow UUID), but it's
            useful for testing.

    Raises:
        AITrainingRequestError
        AITrainingInternalError

    Example usage:

    >>> train_classifiers(rubric, examples, 'ease')
    '10df7db776686822e501b05f452dc1e4b9141fe5'

    """
    # Get or create the rubric and training examples
    try:
        examples = deserialize_training_examples(examples, rubric_dict)
    except (InvalidRubric, InvalidTrainingExample, InvalidRubricSelection) as ex:
        msg = u"Could not parse rubric and/or training examples: {ex}".format(ex=ex)
        raise AITrainingRequestError(msg)

    # Create the workflow model
    try:
        workflow = AITrainingWorkflow.start_workflow(examples, course_id, item_id, algorithm_id)
    except NoTrainingExamples as ex:
        raise AITrainingRequestError(ex)
    except:
        msg = (
            u"An unexpected error occurred while creating "
            u"the AI training workflow"
        )
        logger.exception(msg)
        raise AITrainingInternalError(msg)

    # Schedule the task, parametrized by the workflow UUID
    try:
        training_tasks.train_classifiers.apply_async(args=[workflow.uuid])
    except ANTICIPATED_CELERY_ERRORS as ex:
        msg = (
            u"An unexpected error occurred while scheduling incomplete training workflows with"
            u" course_id={cid} and item_id={iid}: {ex}"
        ).format(cid=course_id, iid=item_id, ex=ex)
        logger.exception(msg)
        raise AITrainingInternalError(msg)

    # Return the workflow UUID
    return workflow.uuid
Beispiel #6
0
def get_training_task_params(training_workflow_uuid):
    """
    Retrieve the training examples and algorithm ID
    associated with a training task.

    Args:
        training_workflow_uuid (str): The UUID of the training workflow.

    Returns:
        dict with keys:
            * training_examples (list of dict): The examples used to train the classifiers.
            * course_id (unicode): The course ID that the training task is associated with.
            * item_id (unicode): Identifies the item that the AI will be training to grade.
            * algorithm_id (unicode): The ID of the algorithm to use for training.

    Raises:
        AITrainingRequestError
        AITrainingInternalError

    Example usage:
        >>> params = get_training_task_params('abcd1234')
        >>> params['algorithm_id']
        u'ease'
        >>> params['training_examples']
        [
            {
                "text": u"Example answer number one",
                "scores": {
                    "vocabulary": 1,
                    "grammar": 2
                }
            },
            {
                "text": u"Example answer number two",
                "scores": {
                    "vocabulary": 3,
                    "grammar": 1
                }
            }
        ]

    """
    try:
        workflow = AITrainingWorkflow.objects.get(uuid=training_workflow_uuid)
        returned_examples = []

        for example in workflow.training_examples.all():
            scores = {
                option.criterion.name: option.points
                for option in example.options_selected.all()
            }

            returned_examples.append({
                'text':
                essay_text_from_submission({'answer': example.answer}),
                'scores':
                scores
            })

        return {
            'training_examples': returned_examples,
            'algorithm_id': workflow.algorithm_id,
            'course_id': workflow.course_id,
            'item_id': workflow.item_id
        }
    except AITrainingWorkflow.DoesNotExist:
        msg = (u"Could not retrieve AI training workflow with UUID {}"
               ).format(training_workflow_uuid)
        raise AITrainingRequestError(msg)
    except DatabaseError:
        msg = (u"An unexpected error occurred while retrieving "
               u"training examples for the AI training workflow with UUID {}"
               ).format(training_workflow_uuid)
        logger.exception(msg)
        raise AITrainingInternalError(msg)