Beispiel #1
0
 def create_model(self):
     if not self.create_model_generic:
         return create.create(self.text, self.scores, "")
     else:
         return create.create_generic(self.text.get('numeric_values', []),
                                      self.text.get('textual_values', []),
                                      self.scores)
Beispiel #2
0
    def _train_classifiers(self, examples):
        """
        Use EASE to train classifiers.

        Args:
            examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.

        Returns:
            tuple of `feature_extractor` (an `ease.feature_extractor.FeatureExtractor` object)
            and `classifier` (a `sklearn.ensemble.GradientBoostingClassifier` object).

        Raises:
            TrainingError: Could not load EASE or could not complete training.

        """
        try:
            from ease.create import create  # pylint: disable=F0401
        except ImportError:
            msg = u"Could not import EASE to perform training."
            raise TrainingError(msg)

        input_essays = [example.text for example in examples]
        input_scores = [example.score for example in examples]

        try:
            # Train the classifiers
            # The third argument is the essay prompt, which EASE uses
            # to check if an input essay is too similar to the prompt.
            # Since we're not using this feature, we pass in an empty string.
            results = create(input_essays, input_scores, "")
        except:
            msg = (
                u"An unexpected error occurred while using "
                u"EASE to train classifiers: {traceback}"
            ).format(traceback=traceback.format_exc())
            raise TrainingError(msg)

        if not results.get('success', False):
            msg = (
                u"Errors occurred while training classifiers "
                u"using EASE: {errors}"
            ).format(errors=results.get('errors', []))
            raise TrainingError(msg)

        return results.get('feature_ext'), results.get('classifier')
Beispiel #3
0
    def _train_classifiers(self, examples):
        """
        Use EASE to train classifiers.

        Args:
            examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.

        Returns:
            tuple of `feature_extractor` (an `ease.feature_extractor.FeatureExtractor` object)
            and `classifier` (a `sklearn.ensemble.GradientBoostingClassifier` object).

        Raises:
            TrainingError: Could not load EASE or could not complete training.

        """
        try:
            from ease.create import create  # pylint: disable=F0401
        except ImportError:
            msg = u"Could not import EASE to perform training."
            raise TrainingError(msg)

        input_essays = [example.text for example in examples]
        input_scores = [example.score for example in examples]

        try:
            # Train the classifiers
            # The third argument is the essay prompt, which EASE uses
            # to check if an input essay is too similar to the prompt.
            # Since we're not using this feature, we pass in an empty string.
            results = create(input_essays, input_scores, "")
        except:
            msg = (u"An unexpected error occurred while using "
                   u"EASE to train classifiers: {traceback}").format(
                       traceback=traceback.format_exc())
            raise TrainingError(msg)

        if not results.get('success', False):
            msg = (u"Errors occurred while training classifiers "
                   u"using EASE: {errors}").format(
                       errors=results.get('errors', []))
            raise TrainingError(msg)

        return results.get('feature_ext'), results.get('classifier')
Beispiel #4
0
def handle_single_location(location):
    try:
        transaction.commit()
        gc.collect()
        subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location)
        log.debug("Checking location {0} to see if essay count {1} greater than min {2}".format(
            location,
            subs_graded_by_instructor.count(),
            settings.MIN_TO_USE_ML,
        ))
        graded_sub_count=subs_graded_by_instructor.count()

        #check to see if there are enough instructor graded essays for location
        if graded_sub_count >= settings.MIN_TO_USE_ML:

            location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False)
            sub_rubric_scores=[]
            if len(location_suffixes)>0:
                for sub in subs_graded_by_instructor:
                    success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub)
                    sub_rubric_scores.append(scores)

            if settings.MAX_TO_USE_ML<graded_sub_count:
                graded_sub_count = settings.MAX_TO_USE_ML

            subs_graded_by_instructor  = subs_graded_by_instructor[:settings.MAX_TO_USE_ML]
            for m in xrange(0,len(location_suffixes)):
                log.debug("Currently on location {0}.  Greater than zero is a rubric item.".format(m))
                suffix=location_suffixes[m]
                #Get paths to ml model from database
                relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix)
                #Get last created model for given location
                transaction.commit()
                success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix)

                if success:
                    sub_count_diff=graded_sub_count-latest_created_model.number_of_essays
                else:
                    sub_count_diff = graded_sub_count

                #Retrain if no model exists, or every 5 graded essays.
                if not success or sub_count_diff>=5:

                    text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor]
                    ids=[i.id for i in subs_graded_by_instructor]

                    #TODO: Make queries more efficient
                    #This is for the basic overall score
                    if m==0:
                        scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)]
                    else:
                        scores=[z[m-1] for z in sub_rubric_scores]

                    #Get the first graded submission, so that we can extract metadata like rubric, etc, from it
                    first_sub=subs_graded_by_instructor[0]

                    prompt = str(first_sub.prompt.encode('ascii', 'ignore'))
                    rubric = str(first_sub.rubric.encode('ascii', 'ignore'))

                    transaction.commit()

                    #Checks to see if another model creator process has started amodel for this location
                    success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix)

                    #Checks to see if model was started a long time ago, and removes and retries if it was.
                    if model_started:
                        now = timezone.now()
                        second_difference = (now - created_model.date_modified).total_seconds()
                        if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                            log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format(
                                location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                            created_model.delete()
                            model_started = False

                    if not model_started:
                        created_model_dict_initial={
                            'max_score' : first_sub.max_score,
                            'prompt' : prompt,
                            'rubric' : rubric,
                            'location' : location + suffix,
                            'course_id' : first_sub.course_id,
                            'submission_ids_used' : json.dumps(ids),
                            'problem_id' :  first_sub.problem_id,
                            'model_relative_path' : relative_model_path,
                            'model_full_path' : full_model_path,
                            'number_of_essays' : graded_sub_count,
                            'creation_succeeded': False,
                            'creation_started' : True,
                            'creation_finished' : False,
                            }
                        transaction.commit()
                        success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial)
                        transaction.commit()

                        results = create.create(text, scores, prompt)

                        scores = [int(score_item) for score_item in scores]
                        #Add in needed stuff that ml creator does not pass back
                        results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path,
                                        'relative_model_path' : relative_model_path, 'prompt' : prompt})

                        #Try to create model if ml model creator was successful
                        if results['success']:
                            try:
                                success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS)
                                results.update({'s3_public_url' : s3_public_url, 'success' : success})
                                if not success:
                                    results['errors'].append("Could not save model.")
                            except:
                                results['errors'].append("Could not save model.")
                                results['s3_public_url'] = ""
                                log.exception("Problem saving ML model.")

                        created_model_dict_final={
                            'cv_kappa' : results['cv_kappa'],
                            'cv_mean_absolute_error' : results['cv_mean_absolute_error'],
                            'creation_succeeded': results['success'],
                            's3_public_url' : results['s3_public_url'],
                            'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS,
                            's3_bucketname' : str(settings.S3_BUCKETNAME),
                            'creation_finished' : True,
                            'model_relative_path' : relative_model_path,
                            'model_full_path' : full_model_path,
                            'location' : location + suffix,
                            }

                        transaction.commit()
                        success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id)

                        if not success:
                            log.error("ModelCreator creation failed.  Error: {0}".format(id))
                            statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                                tags=["success:False", "location:{0}".format(location)])

                        log.debug("Location: {0} Creation Status: {1} Errors: {2}".format(
                            full_model_path,
                            results['success'],
                            results['errors'],
                        ))
                        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                            tags=["success:{0}".format(results['success']), "location:{0}".format(location)])
        util.log_connection_data()
    except:
        log.exception("Problem creating model for location {0}".format(location))
        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
            tags=["success:Exception", "location:{0}".format(location)])
def handle_single_problem(problem):
    """
    Creates a machine learning model for a given problem.
    problem - A Problem instance (django model)
    """
    overall_success = False
    #This function is called by celery.  This ensures that the database is not stuck in an old transaction
    transaction.commit()
    #Get prompt and essays from problem (needed to train a model)
    prompt = problem.prompt
    essays = problem.essay_set.filter(essay_type="train")

    #Now, try to decode the grades from the essaygrade objects
    essay_text = []
    essay_grades = []
    essay_text_vals = essays.values('essay_text')
    for i in xrange(0,len(essays)):
        try:
            #Get an instructor score for a given essay (stored as a json string in DB) and convert to a list.  Looks like [1,1]
            #where each number denotes a score for a given target number
            essay_grades.append(json.loads(essays[i].get_instructor_scored()[0].target_scores))
            #If a grade could successfully be found, then add the essay text.  Both lists need to be in sync.
            essay_text.append(essay_text_vals[i]['essay_text'])
        except:
            log.error("Could not get latest instructor scored for {0}".format(essays[i].id))

    try:
        #This is needed to remove stray characters that could break the machine learning code
        essay_text = [et.encode('ascii', 'ignore') for et in essay_text]
    except:
        error_message = "Could not correctly encode some submissions: {0}".format(essay_text)
        log.error(error_message)
        transaction.commit()
        return False, error_message

    #Get the maximum target scores from the problem
    first_len = len(json.loads(problem.max_target_scores))
    bad_list = []
    for i in xrange(0,len(essay_grades)):
        #All of the lists within the essay grade list (ie [[[1,1],[2,2]]) need to be the same length
        if len(essay_grades[i])!=first_len:
            error_message = "Problem with an instructor scored essay! {0}".format(essay_grades)
            log.info(error_message)
            bad_list.append(i)

    essay_text = [essay_text[t] for t in xrange(0,len(essay_text)) if t not in bad_list]
    essay_grades = [essay_grades[t] for t in xrange(0,len(essay_grades)) if t not in bad_list]

    #Too many essays can take a very long time to train and eat up system resources.  Enforce a max.
    # Accuracy increases logarithmically, anyways, so you dont lose much here.
    if len(essay_text)>MAX_ESSAYS_TO_TRAIN_WITH:
        essay_text = essay_text[:MAX_ESSAYS_TO_TRAIN_WITH]
        essay_grades = essay_grades[:MAX_ESSAYS_TO_TRAIN_WITH]

    graded_sub_count = len(essay_text)
    #If there are too few essays, then don't train a model.  Need a minimum to get any kind of accuracy.
    if graded_sub_count < MIN_ESSAYS_TO_TRAIN_WITH:
        error_message = "Too few too create a model for problem {0}  need {1} only have {2}".format(problem, MIN_ESSAYS_TO_TRAIN_WITH, graded_sub_count)
        log.error(error_message)
        transaction.commit()
        return False, error_message

    #Loops through each potential target
    for m in xrange(0,first_len):
        #Gets all of the scores for this particular target
        scores = [s[m] for s in essay_grades]
        max_score = max(scores)
        log.debug("Currently on location {0} in problem {1}".format(m, problem.id))
        #Get paths to ml model from database
        relative_model_path, full_model_path= ml_grading_util.get_model_path(problem,m)
        #Get last created model for given location
        transaction.commit()
        success, latest_created_model=ml_grading_util.get_latest_created_model(problem,m)

        if success:
            sub_count_diff=graded_sub_count-latest_created_model.number_of_essays
        else:
            sub_count_diff = graded_sub_count

        #Retrain if no model exists, or every 10 graded essays.
        if not success or sub_count_diff>=10:
            log.info("Starting to create a model because none exists or it is time to retrain.")
            #Checks to see if another model creator process has started amodel for this location
            success, model_started, created_model = ml_grading_util.check_if_model_started(problem)

            #Checks to see if model was started a long time ago, and removes and retries if it was.
            if model_started:
                log.info("A model was started previously.")
                now = timezone.now()
                second_difference = (now - created_model.modified).total_seconds()
                if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                    log.info("Model for problem {0} started over {1} seconds ago, removing and re-attempting.".format(
                        problem.id, settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                    created_model.delete()
                    model_started = False
            #If a model has not been started, then initialize an entry in the database to prevent other threads from duplicating work
            if not model_started:
                created_model_dict_initial={
                    'max_score' : max_score,
                    'prompt' : prompt,
                    'problem' : problem,
                    'model_relative_path' : relative_model_path,
                    'model_full_path' : full_model_path,
                    'number_of_essays' : graded_sub_count,
                    'creation_succeeded': False,
                    'creation_started' : True,
                    'target_number' : m,
                    }
                created_model = CreatedModel(**created_model_dict_initial)
                created_model.save()
                transaction.commit()

                if not isinstance(prompt, basestring):
                    try:
                        prompt = str(prompt)
                    except:
                        prompt = ""
                prompt = prompt.encode('ascii', 'ignore')

                #Call on the ease repo to create a model
                results = create.create(essay_text, scores, prompt)

                scores = [int(score_item) for score_item in scores]
                #Add in needed stuff that ml creator does not pass back
                results.update({
                    'model_path' : full_model_path,
                    'relative_model_path' : relative_model_path
                })

                #Try to create model if ml model creator was successful
                overall_success = results['success']
                if results['success']:
                    try:
                        success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS)
                        results.update({'s3_public_url' : s3_public_url, 'success' : success})
                        if not success:
                            results['errors'].append("Could not save model.")
                    except:
                        results['errors'].append("Could not save model.")
                        results['s3_public_url'] = ""
                        log.exception("Problem saving ML model.")

                created_model_dict_final={
                    'cv_kappa' : results['cv_kappa'],
                    'cv_mean_absolute_error' : results['cv_mean_absolute_error'],
                    'creation_succeeded': results['success'],
                    'creation_started' : False,
                    's3_public_url' : results['s3_public_url'],
                    'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS,
                    's3_bucketname' : str(settings.S3_BUCKETNAME),
                    'model_relative_path' : relative_model_path,
                    'model_full_path' : full_model_path,
                    }

                transaction.commit()
                try:
                    CreatedModel.objects.filter(pk=created_model.pk).update(**created_model_dict_final)
                except:
                    log.error("ModelCreator creation failed.  Error: {0}".format(id))

                log.debug("Location: {0} Creation Status: {1} Errors: {2}".format(
                    full_model_path,
                    results['success'],
                    results['errors'],
                ))
    transaction.commit()
    return overall_success, "Creation succeeded."
Beispiel #6
0
def handle_single_problem(problem):
    """
    Creates a machine learning model for a given problem.
    problem - A Problem instance (django model)
    """
    overall_success = False
    #This function is called by celery.  This ensures that the database is not stuck in an old transaction
    transaction.commit_unless_managed()
    #Get prompt and essays from problem (needed to train a model)
    prompt = problem.prompt
    essays = problem.essay_set.filter(essay_type="train")

    #Now, try to decode the grades from the essaygrade objects
    essay_text = []
    essay_grades = []
    essay_text_vals = essays.values('essay_text')
    for i in xrange(0, len(essays)):
        try:
            #Get an instructor score for a given essay (stored as a json string in DB) and convert to a list.  Looks like [1,1]
            #where each number denotes a score for a given target number
            essay_grades.append(
                json.loads(essays[i].get_instructor_scored()[0].target_scores))
            #If a grade could successfully be found, then add the essay text.  Both lists need to be in sync.
            essay_text.append(essay_text_vals[i]['essay_text'])
        except:
            log.exception(
                "Could not get latest instructor scored for {0}".format(
                    essays[i]))

    try:
        #This is needed to remove stray characters that could break the machine learning code
        essay_text = [et.encode('ascii', 'ignore') for et in essay_text]
    except:
        error_message = "Could not correctly encode some submissions: {0}".format(
            essay_text)
        log.exception(error_message)
        return False, error_message

    #Get the maximum target scores from the problem
    first_len = len(json.loads(problem.max_target_scores))
    bad_list = []
    for i in xrange(0, len(essay_grades)):
        #All of the lists within the essay grade list (ie [[[1,1],[2,2]]) need to be the same length
        if len(essay_grades[i]) != first_len:
            error_message = "Problem with an instructor scored essay! {0}".format(
                essay_grades)
            log.info(error_message)
            bad_list.append(i)

    essay_text = [
        essay_text[t] for t in xrange(0, len(essay_text)) if t not in bad_list
    ]
    essay_grades = [
        essay_grades[t] for t in xrange(0, len(essay_grades))
        if t not in bad_list
    ]

    #Too many essays can take a very long time to train and eat up system resources.  Enforce a max.
    # Accuracy increases logarithmically, anyways, so you dont lose much here.
    if len(essay_text) > MAX_ESSAYS_TO_TRAIN_WITH:
        essay_text = essay_text[:MAX_ESSAYS_TO_TRAIN_WITH]
        essay_grades = essay_grades[:MAX_ESSAYS_TO_TRAIN_WITH]

    graded_sub_count = len(essay_text)
    #If there are too few essays, then don't train a model.  Need a minimum to get any kind of accuracy.
    if graded_sub_count < MIN_ESSAYS_TO_TRAIN_WITH:
        error_message = "Too few too create a model for problem {0}  need {1} only have {2}".format(
            problem, MIN_ESSAYS_TO_TRAIN_WITH, graded_sub_count)
        log.error(error_message)
        return False, error_message

    #Loops through each potential target
    for m in xrange(0, first_len):
        #Gets all of the scores for this particular target
        scores = [s[m] for s in essay_grades]
        max_score = max(scores)
        log.debug("Currently on location {0} in problem {1}".format(
            m, problem.id))
        #Get paths to ml model from database
        relative_model_path, full_model_path = ml_grading_util.get_model_path(
            problem, m)
        #Get last created model for given location
        transaction.commit_unless_managed()
        success, latest_created_model = ml_grading_util.get_latest_created_model(
            problem, m)

        if success:
            sub_count_diff = graded_sub_count - latest_created_model.number_of_essays
        else:
            sub_count_diff = graded_sub_count

        #Retrain if no model exists, or every 10 graded essays.
        if not success or sub_count_diff >= 10:
            log.info(
                "Starting to create a model because none exists or it is time to retrain."
            )
            #Checks to see if another model creator process has started amodel for this location
            success, model_started, created_model = ml_grading_util.check_if_model_started(
                problem)

            #Checks to see if model was started a long time ago, and removes and retries if it was.
            if model_started:
                log.info("A model was started previously.")
                now = timezone.now()
                second_difference = (now -
                                     created_model.modified).total_seconds()
                if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                    log.info(
                        "Model for problem {0} started over {1} seconds ago, removing and re-attempting."
                        .format(problem.id,
                                settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                    created_model.delete()
                    model_started = False
            #If a model has not been started, then initialize an entry in the database to prevent other threads from duplicating work
            if not model_started:
                created_model_dict_initial = {
                    'max_score': max_score,
                    'prompt': prompt,
                    'problem': problem,
                    'model_relative_path': relative_model_path,
                    'model_full_path': full_model_path,
                    'number_of_essays': graded_sub_count,
                    'creation_succeeded': False,
                    'creation_started': True,
                    'target_number': m,
                }
                created_model = CreatedModel(**created_model_dict_initial)
                created_model.save()
                transaction.commit_unless_managed()

                if not isinstance(prompt, basestring):
                    try:
                        prompt = str(prompt)
                    except:
                        prompt = ""
                prompt = prompt.encode('ascii', 'ignore')

                #Call on the ease repo to create a model
                results = create.create(essay_text, scores, prompt)

                scores = [int(score_item) for score_item in scores]
                #Add in needed stuff that ml creator does not pass back
                results.update({
                    'model_path': full_model_path,
                    'relative_model_path': relative_model_path
                })

                #Try to create model if ml model creator was successful
                overall_success = results['success']
                if results['success']:
                    try:
                        success, s3_public_url = save_model_file(
                            results, settings.USE_S3_TO_STORE_MODELS)
                        results.update({
                            's3_public_url': s3_public_url,
                            'success': success
                        })
                        if not success:
                            results['errors'].append("Could not save model.")
                    except:
                        results['errors'].append("Could not save model.")
                        results['s3_public_url'] = ""
                        log.exception("Problem saving ML model.")

                created_model_dict_final = {
                    'cv_kappa': results['cv_kappa'],
                    'cv_mean_absolute_error':
                    results['cv_mean_absolute_error'],
                    'creation_succeeded': results['success'],
                    'creation_started': False,
                    's3_public_url': results['s3_public_url'],
                    'model_stored_in_s3': settings.USE_S3_TO_STORE_MODELS,
                    's3_bucketname': str(settings.S3_BUCKETNAME),
                    'model_relative_path': relative_model_path,
                    'model_full_path': full_model_path,
                }

                transaction.commit_unless_managed()
                try:
                    CreatedModel.objects.filter(pk=created_model.pk).update(
                        **created_model_dict_final)
                except:
                    log.error(
                        "ModelCreator creation failed.  Error: {0}".format(id))

                log.debug(
                    "Location: {0} Creation Status: {1} Errors: {2}".format(
                        full_model_path,
                        results['success'],
                        results['errors'],
                    ))
    transaction.commit_unless_managed()
    return overall_success, "Creation succeeded."
Beispiel #7
0
def handle_single_location(location):
    try:
        transaction.commit()
        gc.collect()
        sl = staff_grading_util.StaffLocation(location)
        subs_graded_by_instructor = sl.graded()
        log.info("Checking location {0} to see if essay count {1} greater than min {2}".format(
            location,
            subs_graded_by_instructor.count(),
            settings.MIN_TO_USE_ML,
        ))
        graded_sub_count=subs_graded_by_instructor.count()

        #check to see if there are enough instructor graded essays for location
        if graded_sub_count >= settings.MIN_TO_USE_ML:

            location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False)

            if settings.MAX_TO_USE_ML<graded_sub_count:
                graded_sub_count = settings.MAX_TO_USE_ML

            subs_graded_by_instructor  = subs_graded_by_instructor[:settings.MAX_TO_USE_ML]

            sub_rubric_scores=[]
            if len(location_suffixes)>0:
                for sub in subs_graded_by_instructor:
                    success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub)
                    sub_rubric_scores.append(scores)

            for m in xrange(0,len(location_suffixes)):
                log.info("Currently on location {0}.  Greater than zero is a rubric item.".format(m))
                suffix=location_suffixes[m]
                #Get paths to ml model from database
                relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix)
                #Get last created model for given location
                transaction.commit()
                success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix)

                if success:
                    sub_count_diff=graded_sub_count-latest_created_model.number_of_essays
                else:
                    sub_count_diff = graded_sub_count

                #Retrain if no model exists, or every 5 graded essays.
                if not success or sub_count_diff>=5:

                    text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor]
                    ids=[i.id for i in subs_graded_by_instructor]

                    #TODO: Make queries more efficient
                    #This is for the basic overall score
                    if m==0:
                        scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)]
                    else:
                        scores=[z[m-1] for z in sub_rubric_scores]

                    #Get the first graded submission, so that we can extract metadata like rubric, etc, from it
                    first_sub=subs_graded_by_instructor[0]

                    prompt = str(first_sub.prompt.encode('ascii', 'ignore'))
                    rubric = str(first_sub.rubric.encode('ascii', 'ignore'))

                    transaction.commit()

                    #Checks to see if another model creator process has started amodel for this location
                    success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix)

                    #Checks to see if model was started a long time ago, and removes and retries if it was.
                    if model_started:
                        now = timezone.now()
                        second_difference = (now - created_model.date_modified).total_seconds()
                        if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                            log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format(
                                location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                            created_model.delete()
                            model_started = False

                    if not model_started:
                        created_model_dict_initial={
                            'max_score' : first_sub.max_score,
                            'prompt' : prompt,
                            'rubric' : rubric,
                            'location' : location + suffix,
                            'course_id' : first_sub.course_id,
                            'submission_ids_used' : json.dumps(ids),
                            'problem_id' :  first_sub.problem_id,
                            'model_relative_path' : relative_model_path,
                            'model_full_path' : full_model_path,
                            'number_of_essays' : graded_sub_count,
                            'creation_succeeded': False,
                            'creation_started' : True,
                            'creation_finished' : False,
                            }
                        transaction.commit()
                        success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial)
                        transaction.commit()

                        results = create.create(text, scores, prompt)

                        scores = [int(score_item) for score_item in scores]
                        #Add in needed stuff that ml creator does not pass back
                        results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path,
                                        'relative_model_path' : relative_model_path, 'prompt' : prompt})

                        #Try to create model if ml model creator was successful
                        if results['success']:
                            try:
                                success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS)
                                results.update({'s3_public_url' : s3_public_url, 'success' : success})
                                if not success:
                                    results['errors'].append("Could not save model.")
                            except Exception:
                                results['errors'].append("Could not save model.")
                                results['s3_public_url'] = ""
                                log.exception("Problem saving ML model.")

                            created_model_dict_final={
                                'cv_kappa' : results['cv_kappa'],
                                'cv_mean_absolute_error' : results['cv_mean_absolute_error'],
                                'creation_succeeded': results['success'],
                                's3_public_url' : results['s3_public_url'],
                                'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS,
                                's3_bucketname' : str(settings.S3_BUCKETNAME),
                                'creation_finished' : True,
                                'model_relative_path' : relative_model_path,
                                'model_full_path' : full_model_path,
                                'location' : location + suffix,
                                }

                            transaction.commit()
                            success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id)
                        else:
                            log.error("Could not create an ML model.  Have you installed all the needed requirements for ease?  This is for location {0} and rubric item {1}".format(location, m))

                        if not success:
                            log.error("ModelCreator creation failed.  Error: {0}".format(id))
                            statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                                tags=["success:False", "location:{0}".format(location)])

                        log.info("Location: {0} Creation Status: {1} Errors: {2}".format(
                            full_model_path,
                            results['success'],
                            results['errors'],
                        ))
                        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                            tags=["success:{0}".format(results['success']), "location:{0}".format(location)])
        util.log_connection_data()
    except Exception:
        log.exception("Problem creating model for location {0}".format(location))
        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
            tags=["success:Exception", "location:{0}".format(location)])
Beispiel #8
0
 def create_model(self):
     if not self.create_model_generic:
         return create.create(self.text, self.scores, "")
     else:
         return create.create_generic(self.text.get('numeric_values', []), self.text.get('textual_values', []), self.scores)