Beispiel #1
0
    def test_duplicate_training_example(self):
        # Deserialize some examples for a rubric
        deserialize_training_examples(self.EXAMPLES[0:2], self.RUBRIC)

        # Deserialize some more examples, of which two are duplicates
        examples = deserialize_training_examples(self.EXAMPLES, self.RUBRIC)

        # Check that only three examples were created in the database
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 3)

        # Check that the examples match what we got from the deserializer
        self.assertItemsEqual(examples, db_examples)
    def test_duplicate_training_example(self):
        # Deserialize some examples for a rubric
        deserialize_training_examples(self.EXAMPLES[0:2], self.RUBRIC)

        # Deserialize some more examples, of which two are duplicates
        examples = deserialize_training_examples(self.EXAMPLES, self.RUBRIC)

        # Check that only three examples were created in the database
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 3)

        # Check that the examples match what we got from the deserializer
        self.assertItemsEqual(examples, db_examples)
Beispiel #3
0
 def setUp(self):
     """
     Create a training workflow in the database.
     """
     examples = deserialize_training_examples(EXAMPLES, RUBRIC)
     workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID, self.ITEM_ID, self.ALGORITHM_ID)
     self.workflow_uuid = workflow.uuid
Beispiel #4
0
    def test_similar_training_examples_different_rubric(self):
        # Deserialize some examples
        first_examples = deserialize_training_examples(self.EXAMPLES, self.RUBRIC)

        # Deserialize one more example with the rubric mutated slightly
        mutated_rubric = copy.deepcopy(self.RUBRIC)
        mutated_rubric['criteria'][0]['options'][0]['points'] = 5
        second_examples = deserialize_training_examples(self.EXAMPLES[0:2], mutated_rubric)

        # There should be a total of 5 examples (3 for the first rubric + 2 for the second)
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 5)

        # Check that each of the examples from the deserializer are in the database
        for example in (first_examples + second_examples):
            self.assertIn(example, db_examples)
Beispiel #5
0
def train_classifiers(rubric_dict, examples, course_id, item_id, algorithm_id):
    """
    Schedule a task to train classifiers.
    All training examples must match the rubric!
    After training of classifiers completes successfully, all AIGradingWorkflows that are incomplete will be
    automatically rescheduled to complete.

    Args:
        rubric_dict (dict): The rubric used to assess the classifiers.
        examples (list of dict): Serialized training examples.
        algorithm_id (unicode): The ID of the algorithm used to train the classifiers.

    Returns:
        training_workflow_uuid (str): The UUID of the training workflow.
            Usually the caller will not need this (since the workers
            are parametrized by training workflow UUID), but it's
            useful for testing.

    Raises:
        AITrainingRequestError
        AITrainingInternalError

    Example usage:
    >>> train_classifiers(rubric, examples, 'ease')
    '10df7db776686822e501b05f452dc1e4b9141fe5'

    """
    # Get or create the rubric and training examples
    try:
        examples = deserialize_training_examples(examples, rubric_dict)
    except (InvalidRubric, InvalidTrainingExample, InvalidRubricSelection) as ex:
        msg = u"Could not parse rubric and/or training examples: {ex}".format(ex=ex)
        raise AITrainingRequestError(msg)

    # Create the workflow model
    try:
        workflow = AITrainingWorkflow.start_workflow(examples, course_id, item_id, algorithm_id)
    except NoTrainingExamples as ex:
        raise AITrainingRequestError(ex)
    except:
        msg = (
            u"An unexpected error occurred while creating "
            u"the AI training workflow"
        )
        logger.exception(msg)
        raise AITrainingInternalError(msg)

    # Schedule the task, parametrized by the workflow UUID
    try:
        training_tasks.train_classifiers.apply_async(args=[workflow.uuid])
    except ANTICIPATED_CELERY_ERRORS as ex:
        msg = (
            u"An unexpected error occurred while scheduling incomplete training workflows with"
            u" course_id={cid} and item_id={iid}: {ex}"
        ).format(cid=course_id, iid=item_id, ex=ex)
        logger.exception(msg)
        raise AITrainingInternalError(msg)

    # Return the workflow UUID
    return workflow.uuid
Beispiel #6
0
    def test_similar_training_examples_different_options(self):
        # Deserialize some examples
        first_examples = deserialize_training_examples(self.EXAMPLES, self.RUBRIC)

        # Deserialize another example that's identical to the first example,
        # with one option changed
        mutated_examples = copy.deepcopy(self.EXAMPLES)
        mutated_examples[0]['options_selected'][u'vøȼȺƀᵾłȺɍɏ'] = u"єχ¢єℓℓєηт"
        second_examples = deserialize_training_examples(mutated_examples, self.RUBRIC)

        # Expect that a total of 4 examples (3 for the first call, plus one new example in the second call)
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 4)

        # Check that all the examples are in the database
        for example in (first_examples + second_examples):
            self.assertIn(example, db_examples)
Beispiel #7
0
    def test_similar_training_examples_different_answer(self):
        # Deserialize some examples
        first_examples = deserialize_training_examples(self.EXAMPLES, self.RUBRIC)

        # Deserialize another example that's identical to the first example,
        # with a different answer
        mutated_examples = copy.deepcopy(self.EXAMPLES)
        mutated_examples[0]['answer'] = u"MUTATED!"
        second_examples = deserialize_training_examples(mutated_examples, self.RUBRIC)

        # Expect that a total of 4 examples (3 for the first call, plus one new example in the second call)
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 4)

        # Check that all the examples are in the database
        for example in (first_examples + second_examples):
            self.assertIn(example, db_examples)
Beispiel #8
0
 def setUp(self):
     """
     Create a training workflow in the database.
     """
     examples = deserialize_training_examples(EXAMPLES, RUBRIC)
     workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID,
                                                  self.ITEM_ID,
                                                  self.ALGORITHM_ID)
     self.workflow_uuid = workflow.uuid
    def test_similar_training_examples_different_rubric(self):
        # Deserialize some examples
        first_examples = deserialize_training_examples(self.EXAMPLES,
                                                       self.RUBRIC)

        # Deserialize one more example with the rubric mutated slightly
        mutated_rubric = copy.deepcopy(self.RUBRIC)
        mutated_rubric['criteria'][0]['options'][0]['points'] = 5
        second_examples = deserialize_training_examples(
            self.EXAMPLES[0:2], mutated_rubric)

        # There should be a total of 5 examples (3 for the first rubric + 2 for the second)
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 5)

        # Check that each of the examples from the deserializer are in the database
        for example in (first_examples + second_examples):
            self.assertIn(example, db_examples)
Beispiel #10
0
 def test_serialize_training_example_with_legacy_answer(self):
     """Test that legacy answer format in training example serialized correctly"""
     training_examples = deserialize_training_examples(self.EXAMPLES, self.RUBRIC)
     for example in training_examples:
         self.assertIsInstance(example.answer, unicode)
         serialized_example = serialize_training_example(example)
         self.assertIsInstance(serialized_example["answer"], dict)
         expected_answer_dict = {'parts': [{'text': example.answer}]}
         self.assertEqual(serialized_example["answer"], expected_answer_dict)
    def test_similar_training_examples_different_answer(self):
        # Deserialize some examples
        first_examples = deserialize_training_examples(self.EXAMPLES,
                                                       self.RUBRIC)

        # Deserialize another example that's identical to the first example,
        # with a different answer
        mutated_examples = copy.deepcopy(self.EXAMPLES)
        mutated_examples[0]['answer'] = u"MUTATED!"
        second_examples = deserialize_training_examples(
            mutated_examples, self.RUBRIC)

        # Expect that a total of 4 examples (3 for the first call, plus one new example in the second call)
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 4)

        # Check that all the examples are in the database
        for example in (first_examples + second_examples):
            self.assertIn(example, db_examples)
    def test_similar_training_examples_different_options(self):
        # Deserialize some examples
        first_examples = deserialize_training_examples(self.EXAMPLES,
                                                       self.RUBRIC)

        # Deserialize another example that's identical to the first example,
        # with one option changed
        mutated_examples = copy.deepcopy(self.EXAMPLES)
        mutated_examples[0]['options_selected'][u'vøȼȺƀᵾłȺɍɏ'] = u"єχ¢єℓℓєηт"
        second_examples = deserialize_training_examples(
            mutated_examples, self.RUBRIC)

        # Expect that a total of 4 examples (3 for the first call, plus one new example in the second call)
        db_examples = TrainingExample.objects.all()
        self.assertEqual(len(db_examples), 4)

        # Check that all the examples are in the database
        for example in (first_examples + second_examples):
            self.assertIn(example, db_examples)
 def test_serialize_training_example_with_legacy_answer(self):
     """Test that legacy answer format in training example serialized correctly"""
     training_examples = deserialize_training_examples(
         self.EXAMPLES, self.RUBRIC)
     for example in training_examples:
         self.assertIsInstance(example.answer, unicode)
         serialized_example = serialize_training_example(example)
         self.assertIsInstance(serialized_example["answer"], dict)
         expected_answer_dict = {'parts': [{'text': example.answer}]}
         self.assertEqual(serialized_example["answer"],
                          expected_answer_dict)
Beispiel #14
0
    def test_deserialize_integrity_error(self, mock_create, mock_get):
        # Simulate an integrity error when creating the training example
        # This can occur when using repeatable-read isolation mode.
        mock_example = mock.MagicMock(TrainingExample)
        mock_get.side_effect = [TrainingExample.DoesNotExist, mock_example]
        mock_create.side_effect = IntegrityError

        # Expect that we get the mock example back
        # (proves that the function tried to retrieve the object again after
        # catching the integrity error)
        examples = deserialize_training_examples(self.EXAMPLES[:1], self.RUBRIC)
        self.assertEqual(examples, [mock_example])
Beispiel #15
0
    def test_deserialize_integrity_error(self):
        """
        Simulate an integrity error when creating the training example
        This can occur when using repeatable-read isolation mode.
        """
        example = deserialize_training_examples(self.EXAMPLES[:1],
                                                self.RUBRIC)[0]
        with mock.patch(
                'openassessment.assessment.models.TrainingExample.objects.get'
        ) as mock_get:
            with mock.patch(
                    'openassessment.assessment.models.TrainingExample.create_example'
            ) as mock_create:
                mock_get.side_effect = [TrainingExample.DoesNotExist, example]
                mock_create.side_effect = IntegrityError

                # Expect that we get the mock example back
                # (proves that the function tried to retrieve the object again after
                # catching the integrity error)
                examples = deserialize_training_examples(
                    self.EXAMPLES[:1], self.RUBRIC)
                self.assertEqual(examples, [example])
    def test_deserialize_integrity_error(self, mock_create, mock_get):
        # Simulate an integrity error when creating the training example
        # This can occur when using repeatable-read isolation mode.
        mock_example = mock.MagicMock(TrainingExample)
        mock_get.side_effect = [TrainingExample.DoesNotExist, mock_example]
        mock_create.side_effect = IntegrityError

        # Expect that we get the mock example back
        # (proves that the function tried to retrieve the object again after
        # catching the integrity error)
        examples = deserialize_training_examples(self.EXAMPLES[:1],
                                                 self.RUBRIC)
        self.assertEqual(examples, [mock_example])
Beispiel #17
0
def get_training_example(submission_uuid, rubric, examples):
    """
    Retrieve a training example for the student to assess.
    This will implicitly create a workflow for the student if one does not yet exist.

    NOTE: We include the rubric in the returned dictionary to handle
    the case in which the instructor changes the rubric definition
    while the student is assessing the training example.  Once a student
    starts on a training example, the student should see the same training
    example consistently.  However, the next training example the student
    retrieves will use the updated rubric.

    Args:
        submission_uuid (str): The UUID of the student's submission.
        rubric (dict): Serialized rubric model.
        examples (list): List of serialized training examples.

    Returns:
        dict: The training example with keys "answer", "rubric", and "options_selected".
        If no training examples are available (the student has already assessed every example,
            or no examples are defined), returns None.

    Raises:
        StudentTrainingInternalError

    Example usage:

        >>> examples = [
        >>>     {
        >>>         'answer': u'Doler',
        >>>         'options_selected': {
        >>>             'vocabulary': 'good',
        >>>             'grammar': 'poor'
        >>>         }
        >>>     }
        >>> ]
        >>>
        >>> get_training_example("5443ebbbe2297b30f503736e26be84f6c7303c57", rubric, examples)
        {
            'answer': u'Lorem ipsum',
            'rubric': {
                "prompt": "Write an essay!",
                "criteria": [
                    {
                        "order_num": 0,
                        "name": "vocabulary",
                        "prompt": "How varied is the vocabulary?",
                        "options": options
                    },
                    {
                        "order_num": 1,
                        "name": "grammar",
                        "prompt": "How correct is the grammar?",
                        "options": options
                    }
                ],
            },
            'options_selected': {
                'vocabulary': 'good',
                'grammar': 'excellent'
            }
        }

    """
    try:
        # Validate the training examples
        errors = validate_training_examples(rubric, examples)
        if len(errors) > 0:
            msg = (
                u"Training examples do not match the rubric (submission UUID is {uuid}): {errors}"
            ).format(uuid=submission_uuid, errors="\n".join(errors))
            raise StudentTrainingRequestError(msg)

        # Get or create the workflow
        workflow = StudentTrainingWorkflow.get_workflow(submission_uuid=submission_uuid)
        if not workflow:
            raise StudentTrainingRequestError(
                u"No student training workflow found for submission {}".format(submission_uuid)
            )

        # Get or create the training examples
        examples = deserialize_training_examples(examples, rubric)

        # Pick a training example that the student has not yet completed
        # If the student already started a training example, then return that instead.
        next_example = workflow.next_training_example(examples)
        return None if next_example is None else serialize_training_example(next_example)
    except (InvalidRubric, InvalidRubricSelection, InvalidTrainingExample) as ex:
        logger.exception(
            "Could not deserialize training examples for submission UUID {}".format(submission_uuid)
        )
        raise StudentTrainingRequestError(ex)
    except sub_api.SubmissionNotFoundError as ex:
        msg = u"Could not retrieve the submission with UUID {}".format(submission_uuid)
        logger.exception(msg)
        raise StudentTrainingRequestError(msg)
    except DatabaseError:
        msg = (
            u"Could not retrieve a training example "
            u"for the student with submission UUID {}"
        ).format(submission_uuid)
        logger.exception(msg)
        raise StudentTrainingInternalError(msg)
Beispiel #18
0
def get_training_example(submission_uuid, rubric, examples):
    """
    Retrieve a training example for the student to assess.
    This will implicitly create a workflow for the student if one does not yet exist.

    NOTE: We include the rubric in the returned dictionary to handle
    the case in which the instructor changes the rubric definition
    while the student is assessing the training example.  Once a student
    starts on a training example, the student should see the same training
    example consistently.  However, the next training example the student
    retrieves will use the updated rubric.

    Args:
        submission_uuid (str): The UUID of the student's submission.
        rubric (dict): Serialized rubric model.
        examples (list): List of serialized training examples.

    Returns:
        dict: The training example with keys "answer", "rubric", and "options_selected".
        If no training examples are available (the student has already assessed every example,
            or no examples are defined), returns None.

    Raises:
        StudentTrainingInternalError

    Example usage:

        >>> examples = [
        >>>     {
        >>>         'answer': {
        >>>             'parts': {
        >>>                 [
        >>>                     {'text:' 'Answer part 1'},
        >>>                     {'text:' 'Answer part 2'},
        >>>                     {'text:' 'Answer part 3'}
        >>>                 ]
        >>>             }
        >>>         },
        >>>         'options_selected': {
        >>>             'vocabulary': 'good',
        >>>             'grammar': 'poor'
        >>>         }
        >>>     }
        >>> ]
        >>>
        >>> get_training_example("5443ebbbe2297b30f503736e26be84f6c7303c57", rubric, examples)
        {
             'answer': {
                 'parts': {
                     [
                         {'text:' 'Answer part 1'},
                         {'text:' 'Answer part 2'},
                         {'text:' 'Answer part 3'}
                     ]
                 }
             },
            'rubric': {
                "prompts": [
                    {"description": "Prompt 1"},
                    {"description": "Prompt 2"},
                    {"description": "Prompt 3"}
                ],
                "criteria": [
                    {
                        "order_num": 0,
                        "name": "vocabulary",
                        "prompt": "How varied is the vocabulary?",
                        "options": options
                    },
                    {
                        "order_num": 1,
                        "name": "grammar",
                        "prompt": "How correct is the grammar?",
                        "options": options
                    }
                ],
            },
            'options_selected': {
                'vocabulary': 'good',
                'grammar': 'excellent'
            }
        }

    """
    try:
        # Validate the training examples
        errors = validate_training_examples(rubric, examples)
        if errors:
            msg = (
                "Training examples do not match the rubric (submission UUID is {uuid}): {errors}"
            ).format(uuid=submission_uuid, errors="\n".join(errors))
            raise StudentTrainingRequestError(msg)

        # Get or create the workflow
        workflow = StudentTrainingWorkflow.get_workflow(submission_uuid=submission_uuid)
        if not workflow:
            raise StudentTrainingRequestError(
                u"No learner training workflow found for submission {}".format(submission_uuid)
            )

        # Get or create the training examples
        examples = deserialize_training_examples(examples, rubric)

        # Pick a training example that the student has not yet completed
        # If the student already started a training example, then return that instead.
        next_example = workflow.next_training_example(examples)
        return None if next_example is None else serialize_training_example(next_example)
    except (InvalidRubric, InvalidRubricSelection, InvalidTrainingExample) as ex:
        logger.exception(
            u"Could not deserialize training examples for submission UUID {}".format(submission_uuid)
        )
        raise StudentTrainingRequestError(ex) from ex
    except sub_api.SubmissionNotFoundError as ex:
        msg = u"Could not retrieve the submission with UUID {}".format(submission_uuid)
        logger.exception(msg)
        raise StudentTrainingRequestError(msg) from ex
    except DatabaseError as ex:
        msg = (
            u"Could not retrieve a training example for the learner with submission UUID {}"
        ).format(submission_uuid)
        logger.exception(msg)
        raise StudentTrainingInternalError(msg) from ex
Beispiel #19
0
def train_classifiers(rubric_dict, examples, course_id, item_id, algorithm_id):
    """
    Schedule a task to train classifiers.
    All training examples must match the rubric!
    After training of classifiers completes successfully, all AIGradingWorkflows that are incomplete will be
    automatically rescheduled to complete.

    Args:
        rubric_dict (dict): The rubric used to assess the classifiers.
        examples (list of dict): Serialized training examples.
        algorithm_id (unicode): The ID of the algorithm used to train the classifiers.

    Returns:
        training_workflow_uuid (str): The UUID of the training workflow.
            Usually the caller will not need this (since the workers
            are parametrized by training workflow UUID), but it's
            useful for testing.

    Raises:
        AITrainingRequestError
        AITrainingInternalError

    Example usage:

    >>> train_classifiers(rubric, examples, 'ease')
    '10df7db776686822e501b05f452dc1e4b9141fe5'

    """
    # Get or create the rubric and training examples
    try:
        examples = deserialize_training_examples(examples, rubric_dict)
    except (InvalidRubric, InvalidTrainingExample, InvalidRubricSelection) as ex:
        msg = u"Could not parse rubric and/or training examples: {ex}".format(ex=ex)
        raise AITrainingRequestError(msg)

    # Create the workflow model
    try:
        workflow = AITrainingWorkflow.start_workflow(examples, course_id, item_id, algorithm_id)
    except NoTrainingExamples as ex:
        raise AITrainingRequestError(ex)
    except:
        msg = (
            u"An unexpected error occurred while creating "
            u"the AI training workflow"
        )
        logger.exception(msg)
        raise AITrainingInternalError(msg)

    # Schedule the task, parametrized by the workflow UUID
    try:
        training_tasks.train_classifiers.apply_async(args=[workflow.uuid])
    except ANTICIPATED_CELERY_ERRORS as ex:
        msg = (
            u"An unexpected error occurred while scheduling incomplete training workflows with"
            u" course_id={cid} and item_id={iid}: {ex}"
        ).format(cid=course_id, iid=item_id, ex=ex)
        logger.exception(msg)
        raise AITrainingInternalError(msg)

    # Return the workflow UUID
    return workflow.uuid