Example #1
0
def remove_old_model_files():
    transaction.commit()
    locations = [
        cm['location']
        for cm in CreatedModel.objects.all().values('location').distinct()
    ]
    path_whitelist = []
    for loc in locations:
        success, latest_model = ml_grading_util.get_latest_created_model(loc)
        if success:
            grader_path = latest_model.model_relative_path
            path_whitelist.append(str(grader_path))
    onlyfiles = [
        f for f in os.listdir(settings.ML_MODEL_PATH)
        if os.path.isfile(os.path.join(settings.ML_MODEL_PATH, f))
    ]
    files_to_delete = [f for f in onlyfiles if f not in path_whitelist]
    could_not_delete_list = []
    for i in xrange(0, len(files_to_delete)):
        file = files_to_delete[i]
        try:
            os.remove(str(os.path.join(settings.ML_MODEL_PATH, file)))
        except:
            could_not_delete_list.append(i)

    log.debug("Deleted {0} old ML models.  Could not delete {1}".format(
        (len(files_to_delete) - len(could_not_delete_list)),
        len(could_not_delete_list)))
Example #2
0
def remove_old_model_files():
    transaction.commit()
    locations = [cm['location'] for cm in CreatedModel.objects.all().values('location').distinct()]
    path_whitelist = []
    for loc in locations:
        success, latest_model = ml_grading_util.get_latest_created_model(loc)
        if success:
            grader_path = latest_model.model_relative_path
            path_whitelist.append(str(grader_path))
    onlyfiles = [ f for f in os.listdir(settings.ML_MODEL_PATH) if os.path.isfile(os.path.join(settings.ML_MODEL_PATH,f)) ]
    files_to_delete = [f for f in onlyfiles if f not in path_whitelist]
    could_not_delete_list=[]
    for i in xrange(0,len(files_to_delete)):
        file = files_to_delete[i]
        try:
            os.remove(str(os.path.join(settings.ML_MODEL_PATH,file)))
        except:
            could_not_delete_list.append(i)

    log.debug("Deleted {0} old ML models.  Could not delete {1}".format((
        len(files_to_delete)-len(could_not_delete_list)), len(could_not_delete_list)))
Example #3
0
def handle_single_location(location):
    try:
        transaction.commit()
        gc.collect()
        subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location)
        log.debug("Checking location {0} to see if essay count {1} greater than min {2}".format(
            location,
            subs_graded_by_instructor.count(),
            settings.MIN_TO_USE_ML,
        ))
        graded_sub_count=subs_graded_by_instructor.count()

        #check to see if there are enough instructor graded essays for location
        if graded_sub_count >= settings.MIN_TO_USE_ML:

            location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False)
            sub_rubric_scores=[]
            if len(location_suffixes)>0:
                for sub in subs_graded_by_instructor:
                    success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub)
                    sub_rubric_scores.append(scores)

            if settings.MAX_TO_USE_ML<graded_sub_count:
                graded_sub_count = settings.MAX_TO_USE_ML

            subs_graded_by_instructor  = subs_graded_by_instructor[:settings.MAX_TO_USE_ML]
            for m in xrange(0,len(location_suffixes)):
                log.debug("Currently on location {0}.  Greater than zero is a rubric item.".format(m))
                suffix=location_suffixes[m]
                #Get paths to ml model from database
                relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix)
                #Get last created model for given location
                transaction.commit()
                success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix)

                if success:
                    sub_count_diff=graded_sub_count-latest_created_model.number_of_essays
                else:
                    sub_count_diff = graded_sub_count

                #Retrain if no model exists, or every 5 graded essays.
                if not success or sub_count_diff>=5:

                    text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor]
                    ids=[i.id for i in subs_graded_by_instructor]

                    #TODO: Make queries more efficient
                    #This is for the basic overall score
                    if m==0:
                        scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)]
                    else:
                        scores=[z[m-1] for z in sub_rubric_scores]

                    #Get the first graded submission, so that we can extract metadata like rubric, etc, from it
                    first_sub=subs_graded_by_instructor[0]

                    prompt = str(first_sub.prompt.encode('ascii', 'ignore'))
                    rubric = str(first_sub.rubric.encode('ascii', 'ignore'))

                    transaction.commit()

                    #Checks to see if another model creator process has started amodel for this location
                    success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix)

                    #Checks to see if model was started a long time ago, and removes and retries if it was.
                    if model_started:
                        now = timezone.now()
                        second_difference = (now - created_model.date_modified).total_seconds()
                        if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                            log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format(
                                location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                            created_model.delete()
                            model_started = False

                    if not model_started:
                        created_model_dict_initial={
                            'max_score' : first_sub.max_score,
                            'prompt' : prompt,
                            'rubric' : rubric,
                            'location' : location + suffix,
                            'course_id' : first_sub.course_id,
                            'submission_ids_used' : json.dumps(ids),
                            'problem_id' :  first_sub.problem_id,
                            'model_relative_path' : relative_model_path,
                            'model_full_path' : full_model_path,
                            'number_of_essays' : graded_sub_count,
                            'creation_succeeded': False,
                            'creation_started' : True,
                            'creation_finished' : False,
                            }
                        transaction.commit()
                        success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial)
                        transaction.commit()

                        results = create.create(text, scores, prompt)

                        scores = [int(score_item) for score_item in scores]
                        #Add in needed stuff that ml creator does not pass back
                        results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path,
                                        'relative_model_path' : relative_model_path, 'prompt' : prompt})

                        #Try to create model if ml model creator was successful
                        if results['success']:
                            try:
                                success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS)
                                results.update({'s3_public_url' : s3_public_url, 'success' : success})
                                if not success:
                                    results['errors'].append("Could not save model.")
                            except:
                                results['errors'].append("Could not save model.")
                                results['s3_public_url'] = ""
                                log.exception("Problem saving ML model.")

                        created_model_dict_final={
                            'cv_kappa' : results['cv_kappa'],
                            'cv_mean_absolute_error' : results['cv_mean_absolute_error'],
                            'creation_succeeded': results['success'],
                            's3_public_url' : results['s3_public_url'],
                            'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS,
                            's3_bucketname' : str(settings.S3_BUCKETNAME),
                            'creation_finished' : True,
                            'model_relative_path' : relative_model_path,
                            'model_full_path' : full_model_path,
                            'location' : location + suffix,
                            }

                        transaction.commit()
                        success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id)

                        if not success:
                            log.error("ModelCreator creation failed.  Error: {0}".format(id))
                            statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                                tags=["success:False", "location:{0}".format(location)])

                        log.debug("Location: {0} Creation Status: {1} Errors: {2}".format(
                            full_model_path,
                            results['success'],
                            results['errors'],
                        ))
                        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                            tags=["success:{0}".format(results['success']), "location:{0}".format(location)])
        util.log_connection_data()
    except:
        log.exception("Problem creating model for location {0}".format(location))
        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
            tags=["success:Exception", "location:{0}".format(location)])
Example #4
0
def handle_single_problem(problem):
    """
    Creates a machine learning model for a given problem.
    problem - A Problem instance (django model)
    """
    overall_success = False
    #This function is called by celery.  This ensures that the database is not stuck in an old transaction
    transaction.commit()
    #Get prompt and essays from problem (needed to train a model)
    prompt = problem.prompt
    essays = problem.essay_set.filter(essay_type="train")

    #Now, try to decode the grades from the essaygrade objects
    essay_text = []
    essay_grades = []
    essay_text_vals = essays.values('essay_text')
    for i in xrange(0,len(essays)):
        try:
            #Get an instructor score for a given essay (stored as a json string in DB) and convert to a list.  Looks like [1,1]
            #where each number denotes a score for a given target number
            essay_grades.append(json.loads(essays[i].get_instructor_scored()[0].target_scores))
            #If a grade could successfully be found, then add the essay text.  Both lists need to be in sync.
            essay_text.append(essay_text_vals[i]['essay_text'])
        except:
            log.error("Could not get latest instructor scored for {0}".format(essays[i].id))

    try:
        #This is needed to remove stray characters that could break the machine learning code
        essay_text = [et.encode('ascii', 'ignore') for et in essay_text]
    except:
        error_message = "Could not correctly encode some submissions: {0}".format(essay_text)
        log.error(error_message)
        transaction.commit()
        return False, error_message

    #Get the maximum target scores from the problem
    first_len = len(json.loads(problem.max_target_scores))
    bad_list = []
    for i in xrange(0,len(essay_grades)):
        #All of the lists within the essay grade list (ie [[[1,1],[2,2]]) need to be the same length
        if len(essay_grades[i])!=first_len:
            error_message = "Problem with an instructor scored essay! {0}".format(essay_grades)
            log.info(error_message)
            bad_list.append(i)

    essay_text = [essay_text[t] for t in xrange(0,len(essay_text)) if t not in bad_list]
    essay_grades = [essay_grades[t] for t in xrange(0,len(essay_grades)) if t not in bad_list]

    #Too many essays can take a very long time to train and eat up system resources.  Enforce a max.
    # Accuracy increases logarithmically, anyways, so you dont lose much here.
    if len(essay_text)>MAX_ESSAYS_TO_TRAIN_WITH:
        essay_text = essay_text[:MAX_ESSAYS_TO_TRAIN_WITH]
        essay_grades = essay_grades[:MAX_ESSAYS_TO_TRAIN_WITH]

    graded_sub_count = len(essay_text)
    #If there are too few essays, then don't train a model.  Need a minimum to get any kind of accuracy.
    if graded_sub_count < MIN_ESSAYS_TO_TRAIN_WITH:
        error_message = "Too few too create a model for problem {0}  need {1} only have {2}".format(problem, MIN_ESSAYS_TO_TRAIN_WITH, graded_sub_count)
        log.error(error_message)
        transaction.commit()
        return False, error_message

    #Loops through each potential target
    for m in xrange(0,first_len):
        #Gets all of the scores for this particular target
        scores = [s[m] for s in essay_grades]
        max_score = max(scores)
        log.debug("Currently on location {0} in problem {1}".format(m, problem.id))
        #Get paths to ml model from database
        relative_model_path, full_model_path= ml_grading_util.get_model_path(problem,m)
        #Get last created model for given location
        transaction.commit()
        success, latest_created_model=ml_grading_util.get_latest_created_model(problem,m)

        if success:
            sub_count_diff=graded_sub_count-latest_created_model.number_of_essays
        else:
            sub_count_diff = graded_sub_count

        #Retrain if no model exists, or every 10 graded essays.
        if not success or sub_count_diff>=10:
            log.info("Starting to create a model because none exists or it is time to retrain.")
            #Checks to see if another model creator process has started amodel for this location
            success, model_started, created_model = ml_grading_util.check_if_model_started(problem)

            #Checks to see if model was started a long time ago, and removes and retries if it was.
            if model_started:
                log.info("A model was started previously.")
                now = timezone.now()
                second_difference = (now - created_model.modified).total_seconds()
                if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                    log.info("Model for problem {0} started over {1} seconds ago, removing and re-attempting.".format(
                        problem.id, settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                    created_model.delete()
                    model_started = False
            #If a model has not been started, then initialize an entry in the database to prevent other threads from duplicating work
            if not model_started:
                created_model_dict_initial={
                    'max_score' : max_score,
                    'prompt' : prompt,
                    'problem' : problem,
                    'model_relative_path' : relative_model_path,
                    'model_full_path' : full_model_path,
                    'number_of_essays' : graded_sub_count,
                    'creation_succeeded': False,
                    'creation_started' : True,
                    'target_number' : m,
                    }
                created_model = CreatedModel(**created_model_dict_initial)
                created_model.save()
                transaction.commit()

                if not isinstance(prompt, basestring):
                    try:
                        prompt = str(prompt)
                    except:
                        prompt = ""
                prompt = prompt.encode('ascii', 'ignore')

                #Call on the ease repo to create a model
                results = create.create(essay_text, scores, prompt)

                scores = [int(score_item) for score_item in scores]
                #Add in needed stuff that ml creator does not pass back
                results.update({
                    'model_path' : full_model_path,
                    'relative_model_path' : relative_model_path
                })

                #Try to create model if ml model creator was successful
                overall_success = results['success']
                if results['success']:
                    try:
                        success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS)
                        results.update({'s3_public_url' : s3_public_url, 'success' : success})
                        if not success:
                            results['errors'].append("Could not save model.")
                    except:
                        results['errors'].append("Could not save model.")
                        results['s3_public_url'] = ""
                        log.exception("Problem saving ML model.")

                created_model_dict_final={
                    'cv_kappa' : results['cv_kappa'],
                    'cv_mean_absolute_error' : results['cv_mean_absolute_error'],
                    'creation_succeeded': results['success'],
                    'creation_started' : False,
                    's3_public_url' : results['s3_public_url'],
                    'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS,
                    's3_bucketname' : str(settings.S3_BUCKETNAME),
                    'model_relative_path' : relative_model_path,
                    'model_full_path' : full_model_path,
                    }

                transaction.commit()
                try:
                    CreatedModel.objects.filter(pk=created_model.pk).update(**created_model_dict_final)
                except:
                    log.error("ModelCreator creation failed.  Error: {0}".format(id))

                log.debug("Location: {0} Creation Status: {1} Errors: {2}".format(
                    full_model_path,
                    results['success'],
                    results['errors'],
                ))
    transaction.commit()
    return overall_success, "Creation succeeded."
Example #5
0
def handle_single_item(controller_session):
    sub_get_success, content = get_item_from_controller(controller_session)
    #Grade and handle here
    if sub_get_success:
        transaction.commit()
        sub = Submission.objects.get(id=int(content['submission_id']))
        sl = staff_grading_util.StaffLocation(sub.location)
        subs_graded_by_instructor = sl.graded()
        first_sub = subs_graded_by_instructor.order_by('date_created')[0]
        parsed_rubric=rubric_functions.parse_rubric(first_sub.rubric)


        #strip out unicode and other characters in student response
        #Needed, or grader may potentially fail
        #TODO: Handle unicode in student responses properly
        student_response = sub.student_response.encode('ascii', 'ignore')

        #Get the latest created model for the given location
        transaction.commit()

        location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading = True)

        if len(location_suffixes)>0:
            rubric_scores_complete=True
            rubric_scores=[]

        for m in xrange(0,len(location_suffixes)):
            suffix = location_suffixes[m]
            success, created_model=ml_grading_util.get_latest_created_model(sub.location + suffix)

            if not success:
                log.error("Could not identify a valid created model!")
                if m==0:
                    results= RESULT_FAILURE_DICT
                    formatted_feedback="error"
                    status=GraderStatus.failure
                    statsd.increment("open_ended_assessment.grading_controller.call_ml_grader",
                        tags=["success:False"])

            else:

                #Create grader path from location in submission
                grader_path = os.path.join(settings.ML_MODEL_PATH,created_model.model_relative_path)
                model_stored_in_s3=created_model.model_stored_in_s3

                success, grader_data=load_model_file(created_model,use_full_path=False)
                if success:
                    results = grade.grade(grader_data, student_response)
                else:
                    results=RESULT_FAILURE_DICT

                #If the above fails, try using the full path in the created_model object
                if not results['success'] and not created_model.model_stored_in_s3:
                    grader_path=created_model.model_full_path
                    try:
                        success, grader_data=load_model_file(created_model,use_full_path=True)
                        if success:
                            results = grade.grade(grader_data, student_response)
                        else:
                            results=RESULT_FAILURE_DICT
                    except Exception:
                        error_message="Could not find a valid model file."
                        log.exception(error_message)
                        results=RESULT_FAILURE_DICT

                log.info("ML Grader:  Success: {0} Errors: {1}".format(results['success'], results['errors']))
                statsd.increment("open_ended_assessment.grading_controller.call_ml_grader",
                    tags=["success:{0}".format(results['success']), 'location:{0}'.format(sub.location)])

                #Set grader status according to success/fail
                if results['success']:
                    status = GraderStatus.success
                else:
                    status = GraderStatus.failure

            if m==0:
                final_results=results
            elif results['success']==False:
                rubric_scores_complete = False
            else:
                rubric_scores.append(int(results['score']))
        if len(rubric_scores)==0:
            rubric_scores_complete=False

        grader_dict = {
            'score': int(final_results['score']),
            'feedback': json.dumps(results['feedback']),
            'status': status,
            'grader_id': 1,
            'grader_type': "ML",
            'confidence': results['confidence'],
            'submission_id': sub.id,
            'errors' : ' ' .join(results['errors']),
            'rubric_scores_complete' : rubric_scores_complete,
            'rubric_scores' : json.dumps(rubric_scores),
            }
        #Create grader object in controller by posting back results
        created, msg = util._http_post(
            controller_session,
            urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'],
                project_urls.ControllerURLs.put_result),
            grader_dict,
            settings.REQUESTS_TIMEOUT,
        )
    else:
        log.error("Error getting item from controller or no items to get.")
        statsd.increment("open_ended_assessment.grading_controller.call_ml_grader",
            tags=["success:False"])

    util.log_connection_data()
    return sub_get_success
Example #6
0
def handle_single_problem(problem):
    """
    Creates a machine learning model for a given problem.
    problem - A Problem instance (django model)
    """
    overall_success = False
    #This function is called by celery.  This ensures that the database is not stuck in an old transaction
    transaction.commit_unless_managed()
    #Get prompt and essays from problem (needed to train a model)
    prompt = problem.prompt
    essays = problem.essay_set.filter(essay_type="train")

    #Now, try to decode the grades from the essaygrade objects
    essay_text = []
    essay_grades = []
    essay_text_vals = essays.values('essay_text')
    for i in xrange(0, len(essays)):
        try:
            #Get an instructor score for a given essay (stored as a json string in DB) and convert to a list.  Looks like [1,1]
            #where each number denotes a score for a given target number
            essay_grades.append(
                json.loads(essays[i].get_instructor_scored()[0].target_scores))
            #If a grade could successfully be found, then add the essay text.  Both lists need to be in sync.
            essay_text.append(essay_text_vals[i]['essay_text'])
        except:
            log.exception(
                "Could not get latest instructor scored for {0}".format(
                    essays[i]))

    try:
        #This is needed to remove stray characters that could break the machine learning code
        essay_text = [et.encode('ascii', 'ignore') for et in essay_text]
    except:
        error_message = "Could not correctly encode some submissions: {0}".format(
            essay_text)
        log.exception(error_message)
        return False, error_message

    #Get the maximum target scores from the problem
    first_len = len(json.loads(problem.max_target_scores))
    bad_list = []
    for i in xrange(0, len(essay_grades)):
        #All of the lists within the essay grade list (ie [[[1,1],[2,2]]) need to be the same length
        if len(essay_grades[i]) != first_len:
            error_message = "Problem with an instructor scored essay! {0}".format(
                essay_grades)
            log.info(error_message)
            bad_list.append(i)

    essay_text = [
        essay_text[t] for t in xrange(0, len(essay_text)) if t not in bad_list
    ]
    essay_grades = [
        essay_grades[t] for t in xrange(0, len(essay_grades))
        if t not in bad_list
    ]

    #Too many essays can take a very long time to train and eat up system resources.  Enforce a max.
    # Accuracy increases logarithmically, anyways, so you dont lose much here.
    if len(essay_text) > MAX_ESSAYS_TO_TRAIN_WITH:
        essay_text = essay_text[:MAX_ESSAYS_TO_TRAIN_WITH]
        essay_grades = essay_grades[:MAX_ESSAYS_TO_TRAIN_WITH]

    graded_sub_count = len(essay_text)
    #If there are too few essays, then don't train a model.  Need a minimum to get any kind of accuracy.
    if graded_sub_count < MIN_ESSAYS_TO_TRAIN_WITH:
        error_message = "Too few too create a model for problem {0}  need {1} only have {2}".format(
            problem, MIN_ESSAYS_TO_TRAIN_WITH, graded_sub_count)
        log.error(error_message)
        return False, error_message

    #Loops through each potential target
    for m in xrange(0, first_len):
        #Gets all of the scores for this particular target
        scores = [s[m] for s in essay_grades]
        max_score = max(scores)
        log.debug("Currently on location {0} in problem {1}".format(
            m, problem.id))
        #Get paths to ml model from database
        relative_model_path, full_model_path = ml_grading_util.get_model_path(
            problem, m)
        #Get last created model for given location
        transaction.commit_unless_managed()
        success, latest_created_model = ml_grading_util.get_latest_created_model(
            problem, m)

        if success:
            sub_count_diff = graded_sub_count - latest_created_model.number_of_essays
        else:
            sub_count_diff = graded_sub_count

        #Retrain if no model exists, or every 10 graded essays.
        if not success or sub_count_diff >= 10:
            log.info(
                "Starting to create a model because none exists or it is time to retrain."
            )
            #Checks to see if another model creator process has started amodel for this location
            success, model_started, created_model = ml_grading_util.check_if_model_started(
                problem)

            #Checks to see if model was started a long time ago, and removes and retries if it was.
            if model_started:
                log.info("A model was started previously.")
                now = timezone.now()
                second_difference = (now -
                                     created_model.modified).total_seconds()
                if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                    log.info(
                        "Model for problem {0} started over {1} seconds ago, removing and re-attempting."
                        .format(problem.id,
                                settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                    created_model.delete()
                    model_started = False
            #If a model has not been started, then initialize an entry in the database to prevent other threads from duplicating work
            if not model_started:
                created_model_dict_initial = {
                    'max_score': max_score,
                    'prompt': prompt,
                    'problem': problem,
                    'model_relative_path': relative_model_path,
                    'model_full_path': full_model_path,
                    'number_of_essays': graded_sub_count,
                    'creation_succeeded': False,
                    'creation_started': True,
                    'target_number': m,
                }
                created_model = CreatedModel(**created_model_dict_initial)
                created_model.save()
                transaction.commit_unless_managed()

                if not isinstance(prompt, basestring):
                    try:
                        prompt = str(prompt)
                    except:
                        prompt = ""
                prompt = prompt.encode('ascii', 'ignore')

                #Call on the ease repo to create a model
                results = create.create(essay_text, scores, prompt)

                scores = [int(score_item) for score_item in scores]
                #Add in needed stuff that ml creator does not pass back
                results.update({
                    'model_path': full_model_path,
                    'relative_model_path': relative_model_path
                })

                #Try to create model if ml model creator was successful
                overall_success = results['success']
                if results['success']:
                    try:
                        success, s3_public_url = save_model_file(
                            results, settings.USE_S3_TO_STORE_MODELS)
                        results.update({
                            's3_public_url': s3_public_url,
                            'success': success
                        })
                        if not success:
                            results['errors'].append("Could not save model.")
                    except:
                        results['errors'].append("Could not save model.")
                        results['s3_public_url'] = ""
                        log.exception("Problem saving ML model.")

                created_model_dict_final = {
                    'cv_kappa': results['cv_kappa'],
                    'cv_mean_absolute_error':
                    results['cv_mean_absolute_error'],
                    'creation_succeeded': results['success'],
                    'creation_started': False,
                    's3_public_url': results['s3_public_url'],
                    'model_stored_in_s3': settings.USE_S3_TO_STORE_MODELS,
                    's3_bucketname': str(settings.S3_BUCKETNAME),
                    'model_relative_path': relative_model_path,
                    'model_full_path': full_model_path,
                }

                transaction.commit_unless_managed()
                try:
                    CreatedModel.objects.filter(pk=created_model.pk).update(
                        **created_model_dict_final)
                except:
                    log.error(
                        "ModelCreator creation failed.  Error: {0}".format(id))

                log.debug(
                    "Location: {0} Creation Status: {1} Errors: {2}".format(
                        full_model_path,
                        results['success'],
                        results['errors'],
                    ))
    transaction.commit_unless_managed()
    return overall_success, "Creation succeeded."
Example #7
0
def handle_single_essay(essay):
    #Needed to ensure that the DB is not wrapped in a transaction and pulls old data
    transaction.commit_unless_managed()

    #strip out unicode and other characters in student response
    #Needed, or grader may potentially fail
    #TODO: Handle unicode in student responses properly
    student_response = essay.essay_text.encode('ascii', 'ignore')

    #Gets both the max scores for each target and the number of targets
    target_max_scores = json.loads(essay.problem.max_target_scores)
    target_counts = len(target_max_scores)

    target_scores=[]
    for m in xrange(0,target_counts):
        #Gets latest model for a given problem and target
        success, created_model=ml_grading_util.get_latest_created_model(essay.problem,m)

        if not success:
            error_message = "Could not identify a valid created model!"
            log.error(error_message)
            results= RESULT_FAILURE_DICT
            formatted_feedback="error"
            return False, error_message

        #Create grader path from location in submission
        grader_path = os.path.join(settings.ML_MODEL_PATH,created_model.model_relative_path)

        #Indicates whether the model is stored locally or in the cloud
        model_stored_in_s3=created_model.model_stored_in_s3

        #Try to load the model file
        success, grader_data=load_model_file(created_model,use_full_path=False)
        if success:
            #Send to ML grading algorithm to be graded
            results = grade.grade(grader_data, student_response)
        else:
            results=RESULT_FAILURE_DICT

        #If the above fails, try using the full path in the created_model object
        if not results['success'] and not created_model.model_stored_in_s3:
            #Before, we used the relative path to load.  Possible that the full path may work
            grader_path=created_model.model_full_path
            try:
                success, grader_data=load_model_file(created_model,use_full_path=True)
                if success:
                    results = grade.grade(grader_data, student_response)
                else:
                    results=RESULT_FAILURE_DICT
            except:
                error_message="Could not find a valid model file."
                log.exception(error_message)
                results=RESULT_FAILURE_DICT

        if m==0:
            final_results=results
        if results['success'] == False:
            error_message = "Unsuccessful grading: {0}".format(results)
            log.exception(error_message)
            return False, error_message
        target_scores.append(int(results['score']))

    grader_dict = {
        'essay' : essay,
        'target_scores' : json.dumps(target_scores),
        'grader_type' : GraderTypes.machine,
        'feedback' : '',
        'annotated_text' : '',
        'premium_feedback_scores' : json.dumps([]),
        'success' :final_results['success'],
        'confidence' : final_results['confidence'],
        }

    # Create grader object in controller by posting back results
    essay_grade = EssayGrade(**grader_dict)
    essay_grade.save()
    #Update the essay so that it doesn't keep trying to re-grade
    essay.has_been_ml_graded = True
    essay.save()
    transaction.commit_unless_managed()
    return True, "Successfully scored!"
Example #8
0
def handle_single_item(controller_session):
    sub_get_success, content = get_item_from_controller(controller_session)
    #Grade and handle here
    if sub_get_success:
        transaction.commit()
        sub = Submission.objects.get(id=int(content['submission_id']))
        sl = staff_grading_util.StaffLocation(sub.location)
        subs_graded_by_instructor = sl.graded()
        first_sub = subs_graded_by_instructor.order_by('date_created')[0]
        parsed_rubric = rubric_functions.parse_rubric(first_sub.rubric)

        #strip out unicode and other characters in student response
        #Needed, or grader may potentially fail
        #TODO: Handle unicode in student responses properly
        student_response = sub.student_response.encode('ascii', 'ignore')

        #Get the latest created model for the given location
        transaction.commit()

        location_suffixes = ml_grading_util.generate_rubric_location_suffixes(
            subs_graded_by_instructor, grading=True)

        if len(location_suffixes) > 0:
            rubric_scores_complete = True
            rubric_scores = []

        for m in xrange(0, len(location_suffixes)):
            suffix = location_suffixes[m]
            success, created_model = ml_grading_util.get_latest_created_model(
                sub.location + suffix)

            if not success:
                log.error("Could not identify a valid created model!")
                if m == 0:
                    results = RESULT_FAILURE_DICT
                    formatted_feedback = "error"
                    status = GraderStatus.failure
                    statsd.increment(
                        "open_ended_assessment.grading_controller.call_ml_grader",
                        tags=["success:False"])

            else:

                #Create grader path from location in submission
                grader_path = os.path.join(settings.ML_MODEL_PATH,
                                           created_model.model_relative_path)
                model_stored_in_s3 = created_model.model_stored_in_s3

                success, grader_data = load_model_file(created_model,
                                                       use_full_path=False)
                if success:
                    results = grade.grade(grader_data, student_response)
                else:
                    results = RESULT_FAILURE_DICT

                #If the above fails, try using the full path in the created_model object
                if not results[
                        'success'] and not created_model.model_stored_in_s3:
                    grader_path = created_model.model_full_path
                    try:
                        success, grader_data = load_model_file(
                            created_model, use_full_path=True)
                        if success:
                            results = grade.grade(grader_data,
                                                  student_response)
                        else:
                            results = RESULT_FAILURE_DICT
                    except Exception:
                        error_message = "Could not find a valid model file."
                        log.exception(error_message)
                        results = RESULT_FAILURE_DICT

                log.info("ML Grader:  Success: {0} Errors: {1}".format(
                    results['success'], results['errors']))
                statsd.increment(
                    "open_ended_assessment.grading_controller.call_ml_grader",
                    tags=[
                        "success:{0}".format(results['success']),
                        'location:{0}'.format(sub.location)
                    ])

                #Set grader status according to success/fail
                if results['success']:
                    status = GraderStatus.success
                else:
                    status = GraderStatus.failure

            if m == 0:
                final_results = results
            elif results['success'] == False:
                rubric_scores_complete = False
            else:
                rubric_scores.append(int(results['score']))
        if len(rubric_scores) == 0:
            rubric_scores_complete = False

        grader_dict = {
            'score': int(final_results['score']),
            'feedback': json.dumps(results['feedback']),
            'status': status,
            'grader_id': 1,
            'grader_type': "ML",
            'confidence': results['confidence'],
            'submission_id': sub.id,
            'errors': ' '.join(results['errors']),
            'rubric_scores_complete': rubric_scores_complete,
            'rubric_scores': json.dumps(rubric_scores),
        }
        #Create grader object in controller by posting back results
        created, msg = util._http_post(
            controller_session,
            urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'],
                             project_urls.ControllerURLs.put_result),
            grader_dict,
            settings.REQUESTS_TIMEOUT,
        )
    else:
        log.error("Error getting item from controller or no items to get.")
        statsd.increment(
            "open_ended_assessment.grading_controller.call_ml_grader",
            tags=["success:False"])

    util.log_connection_data()
    return sub_get_success
Example #9
0
def handle_single_location(location):
    try:
        transaction.commit()
        gc.collect()
        sl = staff_grading_util.StaffLocation(location)
        subs_graded_by_instructor = sl.graded()
        log.info("Checking location {0} to see if essay count {1} greater than min {2}".format(
            location,
            subs_graded_by_instructor.count(),
            settings.MIN_TO_USE_ML,
        ))
        graded_sub_count=subs_graded_by_instructor.count()

        #check to see if there are enough instructor graded essays for location
        if graded_sub_count >= settings.MIN_TO_USE_ML:

            location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False)

            if settings.MAX_TO_USE_ML<graded_sub_count:
                graded_sub_count = settings.MAX_TO_USE_ML

            subs_graded_by_instructor  = subs_graded_by_instructor[:settings.MAX_TO_USE_ML]

            sub_rubric_scores=[]
            if len(location_suffixes)>0:
                for sub in subs_graded_by_instructor:
                    success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub)
                    sub_rubric_scores.append(scores)

            for m in xrange(0,len(location_suffixes)):
                log.info("Currently on location {0}.  Greater than zero is a rubric item.".format(m))
                suffix=location_suffixes[m]
                #Get paths to ml model from database
                relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix)
                #Get last created model for given location
                transaction.commit()
                success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix)

                if success:
                    sub_count_diff=graded_sub_count-latest_created_model.number_of_essays
                else:
                    sub_count_diff = graded_sub_count

                #Retrain if no model exists, or every 5 graded essays.
                if not success or sub_count_diff>=5:

                    text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor]
                    ids=[i.id for i in subs_graded_by_instructor]

                    #TODO: Make queries more efficient
                    #This is for the basic overall score
                    if m==0:
                        scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)]
                    else:
                        scores=[z[m-1] for z in sub_rubric_scores]

                    #Get the first graded submission, so that we can extract metadata like rubric, etc, from it
                    first_sub=subs_graded_by_instructor[0]

                    prompt = str(first_sub.prompt.encode('ascii', 'ignore'))
                    rubric = str(first_sub.rubric.encode('ascii', 'ignore'))

                    transaction.commit()

                    #Checks to see if another model creator process has started amodel for this location
                    success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix)

                    #Checks to see if model was started a long time ago, and removes and retries if it was.
                    if model_started:
                        now = timezone.now()
                        second_difference = (now - created_model.date_modified).total_seconds()
                        if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL:
                            log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format(
                                location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL))
                            created_model.delete()
                            model_started = False

                    if not model_started:
                        created_model_dict_initial={
                            'max_score' : first_sub.max_score,
                            'prompt' : prompt,
                            'rubric' : rubric,
                            'location' : location + suffix,
                            'course_id' : first_sub.course_id,
                            'submission_ids_used' : json.dumps(ids),
                            'problem_id' :  first_sub.problem_id,
                            'model_relative_path' : relative_model_path,
                            'model_full_path' : full_model_path,
                            'number_of_essays' : graded_sub_count,
                            'creation_succeeded': False,
                            'creation_started' : True,
                            'creation_finished' : False,
                            }
                        transaction.commit()
                        success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial)
                        transaction.commit()

                        results = create.create(text, scores, prompt)

                        scores = [int(score_item) for score_item in scores]
                        #Add in needed stuff that ml creator does not pass back
                        results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path,
                                        'relative_model_path' : relative_model_path, 'prompt' : prompt})

                        #Try to create model if ml model creator was successful
                        if results['success']:
                            try:
                                success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS)
                                results.update({'s3_public_url' : s3_public_url, 'success' : success})
                                if not success:
                                    results['errors'].append("Could not save model.")
                            except Exception:
                                results['errors'].append("Could not save model.")
                                results['s3_public_url'] = ""
                                log.exception("Problem saving ML model.")

                            created_model_dict_final={
                                'cv_kappa' : results['cv_kappa'],
                                'cv_mean_absolute_error' : results['cv_mean_absolute_error'],
                                'creation_succeeded': results['success'],
                                's3_public_url' : results['s3_public_url'],
                                'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS,
                                's3_bucketname' : str(settings.S3_BUCKETNAME),
                                'creation_finished' : True,
                                'model_relative_path' : relative_model_path,
                                'model_full_path' : full_model_path,
                                'location' : location + suffix,
                                }

                            transaction.commit()
                            success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id)
                        else:
                            log.error("Could not create an ML model.  Have you installed all the needed requirements for ease?  This is for location {0} and rubric item {1}".format(location, m))

                        if not success:
                            log.error("ModelCreator creation failed.  Error: {0}".format(id))
                            statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                                tags=["success:False", "location:{0}".format(location)])

                        log.info("Location: {0} Creation Status: {1} Errors: {2}".format(
                            full_model_path,
                            results['success'],
                            results['errors'],
                        ))
                        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
                            tags=["success:{0}".format(results['success']), "location:{0}".format(location)])
        util.log_connection_data()
    except Exception:
        log.exception("Problem creating model for location {0}".format(location))
        statsd.increment("open_ended_assessment.grading_controller.call_ml_creator",
            tags=["success:Exception", "location:{0}".format(location)])
Example #10
0
def handle_single_essay(essay):
    # Needed to ensure that the DB is not wrapped in a transaction and pulls old data
    transaction.commit()

    # strip out unicode and other characters in student response
    # Needed, or grader may potentially fail
    # TODO: Handle unicode in student responses properly
    student_response = essay.essay_text.encode('ascii', 'ignore')

    # Gets both the max scores for each target and the number of targets
    target_max_scores = json.loads(essay.problem.max_target_scores)
    target_counts = len(target_max_scores)

    target_scores = []
    for m in xrange(0, target_counts):
        # Gets latest model for a given problem and target
        success, created_model = ml_grading_util.get_latest_created_model(
            essay.problem, m)

        if not success:
            results = RESULT_FAILURE_DICT
            formatted_feedback = "error"
            transaction.commit()
            return False, formatted_feedback

        # Try to load the model file
        success, grader_data = load_model_file(created_model,
                                               use_full_path=False)
        if success:
            # Send to ML grading algorithm to be graded
            results = grade.grade(grader_data, student_response)
        else:
            results = RESULT_FAILURE_DICT

        # If the above fails, try using the full path in the created_model object
        if not results['success'] and not created_model.model_stored_in_s3:
            try:
                success, grader_data = load_model_file(created_model,
                                                       use_full_path=True)
                if success:
                    results = grade.grade(grader_data, student_response)
                else:
                    results = RESULT_FAILURE_DICT
            except:
                error_message = "Could not find a valid model file."
                log.exception(error_message)
                results = RESULT_FAILURE_DICT

        if m == 0:
            final_results = results
        if results['success'] == False:
            error_message = "Unsuccessful grading: {0}".format(results)
            log.exception(error_message)
            transaction.commit()
            return False, error_message
        target_scores.append(int(results['score']))

    grader_dict = {
        'essay': essay,
        'target_scores': json.dumps(target_scores),
        'grader_type': GraderTypes.machine,
        'feedback': '',
        'annotated_text': '',
        'premium_feedback_scores': json.dumps([]),
        'success': final_results['success'],
        'confidence': final_results['confidence'],
    }

    # Create grader object in controller by posting back results
    essay_grade = EssayGrade(**grader_dict)
    essay_grade.save()
    # Update the essay so that it doesn't keep trying to re-grade
    essay.has_been_ml_graded = True
    essay.save()
    # copy permissions from the essay to the essaygrade
    helpers.copy_permissions(essay, Essay, essay_grade, EssayGrade)
    transaction.commit()
    return True, "Successfully scored!"