def remove_old_model_files(): transaction.commit() locations = [ cm['location'] for cm in CreatedModel.objects.all().values('location').distinct() ] path_whitelist = [] for loc in locations: success, latest_model = ml_grading_util.get_latest_created_model(loc) if success: grader_path = latest_model.model_relative_path path_whitelist.append(str(grader_path)) onlyfiles = [ f for f in os.listdir(settings.ML_MODEL_PATH) if os.path.isfile(os.path.join(settings.ML_MODEL_PATH, f)) ] files_to_delete = [f for f in onlyfiles if f not in path_whitelist] could_not_delete_list = [] for i in xrange(0, len(files_to_delete)): file = files_to_delete[i] try: os.remove(str(os.path.join(settings.ML_MODEL_PATH, file))) except: could_not_delete_list.append(i) log.debug("Deleted {0} old ML models. Could not delete {1}".format( (len(files_to_delete) - len(could_not_delete_list)), len(could_not_delete_list)))
def remove_old_model_files(): transaction.commit() locations = [cm['location'] for cm in CreatedModel.objects.all().values('location').distinct()] path_whitelist = [] for loc in locations: success, latest_model = ml_grading_util.get_latest_created_model(loc) if success: grader_path = latest_model.model_relative_path path_whitelist.append(str(grader_path)) onlyfiles = [ f for f in os.listdir(settings.ML_MODEL_PATH) if os.path.isfile(os.path.join(settings.ML_MODEL_PATH,f)) ] files_to_delete = [f for f in onlyfiles if f not in path_whitelist] could_not_delete_list=[] for i in xrange(0,len(files_to_delete)): file = files_to_delete[i] try: os.remove(str(os.path.join(settings.ML_MODEL_PATH,file))) except: could_not_delete_list.append(i) log.debug("Deleted {0} old ML models. Could not delete {1}".format(( len(files_to_delete)-len(could_not_delete_list)), len(could_not_delete_list)))
def handle_single_location(location): try: transaction.commit() gc.collect() subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location) log.debug("Checking location {0} to see if essay count {1} greater than min {2}".format( location, subs_graded_by_instructor.count(), settings.MIN_TO_USE_ML, )) graded_sub_count=subs_graded_by_instructor.count() #check to see if there are enough instructor graded essays for location if graded_sub_count >= settings.MIN_TO_USE_ML: location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False) sub_rubric_scores=[] if len(location_suffixes)>0: for sub in subs_graded_by_instructor: success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub) sub_rubric_scores.append(scores) if settings.MAX_TO_USE_ML<graded_sub_count: graded_sub_count = settings.MAX_TO_USE_ML subs_graded_by_instructor = subs_graded_by_instructor[:settings.MAX_TO_USE_ML] for m in xrange(0,len(location_suffixes)): log.debug("Currently on location {0}. Greater than zero is a rubric item.".format(m)) suffix=location_suffixes[m] #Get paths to ml model from database relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix) #Get last created model for given location transaction.commit() success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix) if success: sub_count_diff=graded_sub_count-latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 5 graded essays. if not success or sub_count_diff>=5: text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor] ids=[i.id for i in subs_graded_by_instructor] #TODO: Make queries more efficient #This is for the basic overall score if m==0: scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)] else: scores=[z[m-1] for z in sub_rubric_scores] #Get the first graded submission, so that we can extract metadata like rubric, etc, from it first_sub=subs_graded_by_instructor[0] prompt = str(first_sub.prompt.encode('ascii', 'ignore')) rubric = str(first_sub.rubric.encode('ascii', 'ignore')) transaction.commit() #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: now = timezone.now() second_difference = (now - created_model.date_modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format( location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False if not model_started: created_model_dict_initial={ 'max_score' : first_sub.max_score, 'prompt' : prompt, 'rubric' : rubric, 'location' : location + suffix, 'course_id' : first_sub.course_id, 'submission_ids_used' : json.dumps(ids), 'problem_id' : first_sub.problem_id, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'number_of_essays' : graded_sub_count, 'creation_succeeded': False, 'creation_started' : True, 'creation_finished' : False, } transaction.commit() success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial) transaction.commit() results = create.create(text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path, 'relative_model_path' : relative_model_path, 'prompt' : prompt}) #Try to create model if ml model creator was successful if results['success']: try: success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS) results.update({'s3_public_url' : s3_public_url, 'success' : success}) if not success: results['errors'].append("Could not save model.") except: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final={ 'cv_kappa' : results['cv_kappa'], 'cv_mean_absolute_error' : results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 's3_public_url' : results['s3_public_url'], 'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS, 's3_bucketname' : str(settings.S3_BUCKETNAME), 'creation_finished' : True, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'location' : location + suffix, } transaction.commit() success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id) if not success: log.error("ModelCreator creation failed. Error: {0}".format(id)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:False", "location:{0}".format(location)]) log.debug("Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:{0}".format(results['success']), "location:{0}".format(location)]) util.log_connection_data() except: log.exception("Problem creating model for location {0}".format(location)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:Exception", "location:{0}".format(location)])
def handle_single_problem(problem): """ Creates a machine learning model for a given problem. problem - A Problem instance (django model) """ overall_success = False #This function is called by celery. This ensures that the database is not stuck in an old transaction transaction.commit() #Get prompt and essays from problem (needed to train a model) prompt = problem.prompt essays = problem.essay_set.filter(essay_type="train") #Now, try to decode the grades from the essaygrade objects essay_text = [] essay_grades = [] essay_text_vals = essays.values('essay_text') for i in xrange(0,len(essays)): try: #Get an instructor score for a given essay (stored as a json string in DB) and convert to a list. Looks like [1,1] #where each number denotes a score for a given target number essay_grades.append(json.loads(essays[i].get_instructor_scored()[0].target_scores)) #If a grade could successfully be found, then add the essay text. Both lists need to be in sync. essay_text.append(essay_text_vals[i]['essay_text']) except: log.error("Could not get latest instructor scored for {0}".format(essays[i].id)) try: #This is needed to remove stray characters that could break the machine learning code essay_text = [et.encode('ascii', 'ignore') for et in essay_text] except: error_message = "Could not correctly encode some submissions: {0}".format(essay_text) log.error(error_message) transaction.commit() return False, error_message #Get the maximum target scores from the problem first_len = len(json.loads(problem.max_target_scores)) bad_list = [] for i in xrange(0,len(essay_grades)): #All of the lists within the essay grade list (ie [[[1,1],[2,2]]) need to be the same length if len(essay_grades[i])!=first_len: error_message = "Problem with an instructor scored essay! {0}".format(essay_grades) log.info(error_message) bad_list.append(i) essay_text = [essay_text[t] for t in xrange(0,len(essay_text)) if t not in bad_list] essay_grades = [essay_grades[t] for t in xrange(0,len(essay_grades)) if t not in bad_list] #Too many essays can take a very long time to train and eat up system resources. Enforce a max. # Accuracy increases logarithmically, anyways, so you dont lose much here. if len(essay_text)>MAX_ESSAYS_TO_TRAIN_WITH: essay_text = essay_text[:MAX_ESSAYS_TO_TRAIN_WITH] essay_grades = essay_grades[:MAX_ESSAYS_TO_TRAIN_WITH] graded_sub_count = len(essay_text) #If there are too few essays, then don't train a model. Need a minimum to get any kind of accuracy. if graded_sub_count < MIN_ESSAYS_TO_TRAIN_WITH: error_message = "Too few too create a model for problem {0} need {1} only have {2}".format(problem, MIN_ESSAYS_TO_TRAIN_WITH, graded_sub_count) log.error(error_message) transaction.commit() return False, error_message #Loops through each potential target for m in xrange(0,first_len): #Gets all of the scores for this particular target scores = [s[m] for s in essay_grades] max_score = max(scores) log.debug("Currently on location {0} in problem {1}".format(m, problem.id)) #Get paths to ml model from database relative_model_path, full_model_path= ml_grading_util.get_model_path(problem,m) #Get last created model for given location transaction.commit() success, latest_created_model=ml_grading_util.get_latest_created_model(problem,m) if success: sub_count_diff=graded_sub_count-latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 10 graded essays. if not success or sub_count_diff>=10: log.info("Starting to create a model because none exists or it is time to retrain.") #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started(problem) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: log.info("A model was started previously.") now = timezone.now() second_difference = (now - created_model.modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.info("Model for problem {0} started over {1} seconds ago, removing and re-attempting.".format( problem.id, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False #If a model has not been started, then initialize an entry in the database to prevent other threads from duplicating work if not model_started: created_model_dict_initial={ 'max_score' : max_score, 'prompt' : prompt, 'problem' : problem, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'number_of_essays' : graded_sub_count, 'creation_succeeded': False, 'creation_started' : True, 'target_number' : m, } created_model = CreatedModel(**created_model_dict_initial) created_model.save() transaction.commit() if not isinstance(prompt, basestring): try: prompt = str(prompt) except: prompt = "" prompt = prompt.encode('ascii', 'ignore') #Call on the ease repo to create a model results = create.create(essay_text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({ 'model_path' : full_model_path, 'relative_model_path' : relative_model_path }) #Try to create model if ml model creator was successful overall_success = results['success'] if results['success']: try: success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS) results.update({'s3_public_url' : s3_public_url, 'success' : success}) if not success: results['errors'].append("Could not save model.") except: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final={ 'cv_kappa' : results['cv_kappa'], 'cv_mean_absolute_error' : results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 'creation_started' : False, 's3_public_url' : results['s3_public_url'], 'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS, 's3_bucketname' : str(settings.S3_BUCKETNAME), 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, } transaction.commit() try: CreatedModel.objects.filter(pk=created_model.pk).update(**created_model_dict_final) except: log.error("ModelCreator creation failed. Error: {0}".format(id)) log.debug("Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) transaction.commit() return overall_success, "Creation succeeded."
def handle_single_item(controller_session): sub_get_success, content = get_item_from_controller(controller_session) #Grade and handle here if sub_get_success: transaction.commit() sub = Submission.objects.get(id=int(content['submission_id'])) sl = staff_grading_util.StaffLocation(sub.location) subs_graded_by_instructor = sl.graded() first_sub = subs_graded_by_instructor.order_by('date_created')[0] parsed_rubric=rubric_functions.parse_rubric(first_sub.rubric) #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = sub.student_response.encode('ascii', 'ignore') #Get the latest created model for the given location transaction.commit() location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading = True) if len(location_suffixes)>0: rubric_scores_complete=True rubric_scores=[] for m in xrange(0,len(location_suffixes)): suffix = location_suffixes[m] success, created_model=ml_grading_util.get_latest_created_model(sub.location + suffix) if not success: log.error("Could not identify a valid created model!") if m==0: results= RESULT_FAILURE_DICT formatted_feedback="error" status=GraderStatus.failure statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) else: #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH,created_model.model_relative_path) model_stored_in_s3=created_model.model_stored_in_s3 success, grader_data=load_model_file(created_model,use_full_path=False) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results['success'] and not created_model.model_stored_in_s3: grader_path=created_model.model_full_path try: success, grader_data=load_model_file(created_model,use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT except Exception: error_message="Could not find a valid model file." log.exception(error_message) results=RESULT_FAILURE_DICT log.info("ML Grader: Success: {0} Errors: {1}".format(results['success'], results['errors'])) statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:{0}".format(results['success']), 'location:{0}'.format(sub.location)]) #Set grader status according to success/fail if results['success']: status = GraderStatus.success else: status = GraderStatus.failure if m==0: final_results=results elif results['success']==False: rubric_scores_complete = False else: rubric_scores.append(int(results['score'])) if len(rubric_scores)==0: rubric_scores_complete=False grader_dict = { 'score': int(final_results['score']), 'feedback': json.dumps(results['feedback']), 'status': status, 'grader_id': 1, 'grader_type': "ML", 'confidence': results['confidence'], 'submission_id': sub.id, 'errors' : ' ' .join(results['errors']), 'rubric_scores_complete' : rubric_scores_complete, 'rubric_scores' : json.dumps(rubric_scores), } #Create grader object in controller by posting back results created, msg = util._http_post( controller_session, urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'], project_urls.ControllerURLs.put_result), grader_dict, settings.REQUESTS_TIMEOUT, ) else: log.error("Error getting item from controller or no items to get.") statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) util.log_connection_data() return sub_get_success
def handle_single_problem(problem): """ Creates a machine learning model for a given problem. problem - A Problem instance (django model) """ overall_success = False #This function is called by celery. This ensures that the database is not stuck in an old transaction transaction.commit_unless_managed() #Get prompt and essays from problem (needed to train a model) prompt = problem.prompt essays = problem.essay_set.filter(essay_type="train") #Now, try to decode the grades from the essaygrade objects essay_text = [] essay_grades = [] essay_text_vals = essays.values('essay_text') for i in xrange(0, len(essays)): try: #Get an instructor score for a given essay (stored as a json string in DB) and convert to a list. Looks like [1,1] #where each number denotes a score for a given target number essay_grades.append( json.loads(essays[i].get_instructor_scored()[0].target_scores)) #If a grade could successfully be found, then add the essay text. Both lists need to be in sync. essay_text.append(essay_text_vals[i]['essay_text']) except: log.exception( "Could not get latest instructor scored for {0}".format( essays[i])) try: #This is needed to remove stray characters that could break the machine learning code essay_text = [et.encode('ascii', 'ignore') for et in essay_text] except: error_message = "Could not correctly encode some submissions: {0}".format( essay_text) log.exception(error_message) return False, error_message #Get the maximum target scores from the problem first_len = len(json.loads(problem.max_target_scores)) bad_list = [] for i in xrange(0, len(essay_grades)): #All of the lists within the essay grade list (ie [[[1,1],[2,2]]) need to be the same length if len(essay_grades[i]) != first_len: error_message = "Problem with an instructor scored essay! {0}".format( essay_grades) log.info(error_message) bad_list.append(i) essay_text = [ essay_text[t] for t in xrange(0, len(essay_text)) if t not in bad_list ] essay_grades = [ essay_grades[t] for t in xrange(0, len(essay_grades)) if t not in bad_list ] #Too many essays can take a very long time to train and eat up system resources. Enforce a max. # Accuracy increases logarithmically, anyways, so you dont lose much here. if len(essay_text) > MAX_ESSAYS_TO_TRAIN_WITH: essay_text = essay_text[:MAX_ESSAYS_TO_TRAIN_WITH] essay_grades = essay_grades[:MAX_ESSAYS_TO_TRAIN_WITH] graded_sub_count = len(essay_text) #If there are too few essays, then don't train a model. Need a minimum to get any kind of accuracy. if graded_sub_count < MIN_ESSAYS_TO_TRAIN_WITH: error_message = "Too few too create a model for problem {0} need {1} only have {2}".format( problem, MIN_ESSAYS_TO_TRAIN_WITH, graded_sub_count) log.error(error_message) return False, error_message #Loops through each potential target for m in xrange(0, first_len): #Gets all of the scores for this particular target scores = [s[m] for s in essay_grades] max_score = max(scores) log.debug("Currently on location {0} in problem {1}".format( m, problem.id)) #Get paths to ml model from database relative_model_path, full_model_path = ml_grading_util.get_model_path( problem, m) #Get last created model for given location transaction.commit_unless_managed() success, latest_created_model = ml_grading_util.get_latest_created_model( problem, m) if success: sub_count_diff = graded_sub_count - latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 10 graded essays. if not success or sub_count_diff >= 10: log.info( "Starting to create a model because none exists or it is time to retrain." ) #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started( problem) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: log.info("A model was started previously.") now = timezone.now() second_difference = (now - created_model.modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.info( "Model for problem {0} started over {1} seconds ago, removing and re-attempting." .format(problem.id, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False #If a model has not been started, then initialize an entry in the database to prevent other threads from duplicating work if not model_started: created_model_dict_initial = { 'max_score': max_score, 'prompt': prompt, 'problem': problem, 'model_relative_path': relative_model_path, 'model_full_path': full_model_path, 'number_of_essays': graded_sub_count, 'creation_succeeded': False, 'creation_started': True, 'target_number': m, } created_model = CreatedModel(**created_model_dict_initial) created_model.save() transaction.commit_unless_managed() if not isinstance(prompt, basestring): try: prompt = str(prompt) except: prompt = "" prompt = prompt.encode('ascii', 'ignore') #Call on the ease repo to create a model results = create.create(essay_text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({ 'model_path': full_model_path, 'relative_model_path': relative_model_path }) #Try to create model if ml model creator was successful overall_success = results['success'] if results['success']: try: success, s3_public_url = save_model_file( results, settings.USE_S3_TO_STORE_MODELS) results.update({ 's3_public_url': s3_public_url, 'success': success }) if not success: results['errors'].append("Could not save model.") except: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final = { 'cv_kappa': results['cv_kappa'], 'cv_mean_absolute_error': results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 'creation_started': False, 's3_public_url': results['s3_public_url'], 'model_stored_in_s3': settings.USE_S3_TO_STORE_MODELS, 's3_bucketname': str(settings.S3_BUCKETNAME), 'model_relative_path': relative_model_path, 'model_full_path': full_model_path, } transaction.commit_unless_managed() try: CreatedModel.objects.filter(pk=created_model.pk).update( **created_model_dict_final) except: log.error( "ModelCreator creation failed. Error: {0}".format(id)) log.debug( "Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) transaction.commit_unless_managed() return overall_success, "Creation succeeded."
def handle_single_essay(essay): #Needed to ensure that the DB is not wrapped in a transaction and pulls old data transaction.commit_unless_managed() #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = essay.essay_text.encode('ascii', 'ignore') #Gets both the max scores for each target and the number of targets target_max_scores = json.loads(essay.problem.max_target_scores) target_counts = len(target_max_scores) target_scores=[] for m in xrange(0,target_counts): #Gets latest model for a given problem and target success, created_model=ml_grading_util.get_latest_created_model(essay.problem,m) if not success: error_message = "Could not identify a valid created model!" log.error(error_message) results= RESULT_FAILURE_DICT formatted_feedback="error" return False, error_message #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH,created_model.model_relative_path) #Indicates whether the model is stored locally or in the cloud model_stored_in_s3=created_model.model_stored_in_s3 #Try to load the model file success, grader_data=load_model_file(created_model,use_full_path=False) if success: #Send to ML grading algorithm to be graded results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results['success'] and not created_model.model_stored_in_s3: #Before, we used the relative path to load. Possible that the full path may work grader_path=created_model.model_full_path try: success, grader_data=load_model_file(created_model,use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT except: error_message="Could not find a valid model file." log.exception(error_message) results=RESULT_FAILURE_DICT if m==0: final_results=results if results['success'] == False: error_message = "Unsuccessful grading: {0}".format(results) log.exception(error_message) return False, error_message target_scores.append(int(results['score'])) grader_dict = { 'essay' : essay, 'target_scores' : json.dumps(target_scores), 'grader_type' : GraderTypes.machine, 'feedback' : '', 'annotated_text' : '', 'premium_feedback_scores' : json.dumps([]), 'success' :final_results['success'], 'confidence' : final_results['confidence'], } # Create grader object in controller by posting back results essay_grade = EssayGrade(**grader_dict) essay_grade.save() #Update the essay so that it doesn't keep trying to re-grade essay.has_been_ml_graded = True essay.save() transaction.commit_unless_managed() return True, "Successfully scored!"
def handle_single_item(controller_session): sub_get_success, content = get_item_from_controller(controller_session) #Grade and handle here if sub_get_success: transaction.commit() sub = Submission.objects.get(id=int(content['submission_id'])) sl = staff_grading_util.StaffLocation(sub.location) subs_graded_by_instructor = sl.graded() first_sub = subs_graded_by_instructor.order_by('date_created')[0] parsed_rubric = rubric_functions.parse_rubric(first_sub.rubric) #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = sub.student_response.encode('ascii', 'ignore') #Get the latest created model for the given location transaction.commit() location_suffixes = ml_grading_util.generate_rubric_location_suffixes( subs_graded_by_instructor, grading=True) if len(location_suffixes) > 0: rubric_scores_complete = True rubric_scores = [] for m in xrange(0, len(location_suffixes)): suffix = location_suffixes[m] success, created_model = ml_grading_util.get_latest_created_model( sub.location + suffix) if not success: log.error("Could not identify a valid created model!") if m == 0: results = RESULT_FAILURE_DICT formatted_feedback = "error" status = GraderStatus.failure statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) else: #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH, created_model.model_relative_path) model_stored_in_s3 = created_model.model_stored_in_s3 success, grader_data = load_model_file(created_model, use_full_path=False) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results[ 'success'] and not created_model.model_stored_in_s3: grader_path = created_model.model_full_path try: success, grader_data = load_model_file( created_model, use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT except Exception: error_message = "Could not find a valid model file." log.exception(error_message) results = RESULT_FAILURE_DICT log.info("ML Grader: Success: {0} Errors: {1}".format( results['success'], results['errors'])) statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=[ "success:{0}".format(results['success']), 'location:{0}'.format(sub.location) ]) #Set grader status according to success/fail if results['success']: status = GraderStatus.success else: status = GraderStatus.failure if m == 0: final_results = results elif results['success'] == False: rubric_scores_complete = False else: rubric_scores.append(int(results['score'])) if len(rubric_scores) == 0: rubric_scores_complete = False grader_dict = { 'score': int(final_results['score']), 'feedback': json.dumps(results['feedback']), 'status': status, 'grader_id': 1, 'grader_type': "ML", 'confidence': results['confidence'], 'submission_id': sub.id, 'errors': ' '.join(results['errors']), 'rubric_scores_complete': rubric_scores_complete, 'rubric_scores': json.dumps(rubric_scores), } #Create grader object in controller by posting back results created, msg = util._http_post( controller_session, urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'], project_urls.ControllerURLs.put_result), grader_dict, settings.REQUESTS_TIMEOUT, ) else: log.error("Error getting item from controller or no items to get.") statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) util.log_connection_data() return sub_get_success
def handle_single_location(location): try: transaction.commit() gc.collect() sl = staff_grading_util.StaffLocation(location) subs_graded_by_instructor = sl.graded() log.info("Checking location {0} to see if essay count {1} greater than min {2}".format( location, subs_graded_by_instructor.count(), settings.MIN_TO_USE_ML, )) graded_sub_count=subs_graded_by_instructor.count() #check to see if there are enough instructor graded essays for location if graded_sub_count >= settings.MIN_TO_USE_ML: location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False) if settings.MAX_TO_USE_ML<graded_sub_count: graded_sub_count = settings.MAX_TO_USE_ML subs_graded_by_instructor = subs_graded_by_instructor[:settings.MAX_TO_USE_ML] sub_rubric_scores=[] if len(location_suffixes)>0: for sub in subs_graded_by_instructor: success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub) sub_rubric_scores.append(scores) for m in xrange(0,len(location_suffixes)): log.info("Currently on location {0}. Greater than zero is a rubric item.".format(m)) suffix=location_suffixes[m] #Get paths to ml model from database relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix) #Get last created model for given location transaction.commit() success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix) if success: sub_count_diff=graded_sub_count-latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 5 graded essays. if not success or sub_count_diff>=5: text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor] ids=[i.id for i in subs_graded_by_instructor] #TODO: Make queries more efficient #This is for the basic overall score if m==0: scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)] else: scores=[z[m-1] for z in sub_rubric_scores] #Get the first graded submission, so that we can extract metadata like rubric, etc, from it first_sub=subs_graded_by_instructor[0] prompt = str(first_sub.prompt.encode('ascii', 'ignore')) rubric = str(first_sub.rubric.encode('ascii', 'ignore')) transaction.commit() #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: now = timezone.now() second_difference = (now - created_model.date_modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format( location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False if not model_started: created_model_dict_initial={ 'max_score' : first_sub.max_score, 'prompt' : prompt, 'rubric' : rubric, 'location' : location + suffix, 'course_id' : first_sub.course_id, 'submission_ids_used' : json.dumps(ids), 'problem_id' : first_sub.problem_id, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'number_of_essays' : graded_sub_count, 'creation_succeeded': False, 'creation_started' : True, 'creation_finished' : False, } transaction.commit() success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial) transaction.commit() results = create.create(text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path, 'relative_model_path' : relative_model_path, 'prompt' : prompt}) #Try to create model if ml model creator was successful if results['success']: try: success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS) results.update({'s3_public_url' : s3_public_url, 'success' : success}) if not success: results['errors'].append("Could not save model.") except Exception: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final={ 'cv_kappa' : results['cv_kappa'], 'cv_mean_absolute_error' : results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 's3_public_url' : results['s3_public_url'], 'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS, 's3_bucketname' : str(settings.S3_BUCKETNAME), 'creation_finished' : True, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'location' : location + suffix, } transaction.commit() success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id) else: log.error("Could not create an ML model. Have you installed all the needed requirements for ease? This is for location {0} and rubric item {1}".format(location, m)) if not success: log.error("ModelCreator creation failed. Error: {0}".format(id)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:False", "location:{0}".format(location)]) log.info("Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:{0}".format(results['success']), "location:{0}".format(location)]) util.log_connection_data() except Exception: log.exception("Problem creating model for location {0}".format(location)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:Exception", "location:{0}".format(location)])
def handle_single_essay(essay): # Needed to ensure that the DB is not wrapped in a transaction and pulls old data transaction.commit() # strip out unicode and other characters in student response # Needed, or grader may potentially fail # TODO: Handle unicode in student responses properly student_response = essay.essay_text.encode('ascii', 'ignore') # Gets both the max scores for each target and the number of targets target_max_scores = json.loads(essay.problem.max_target_scores) target_counts = len(target_max_scores) target_scores = [] for m in xrange(0, target_counts): # Gets latest model for a given problem and target success, created_model = ml_grading_util.get_latest_created_model( essay.problem, m) if not success: results = RESULT_FAILURE_DICT formatted_feedback = "error" transaction.commit() return False, formatted_feedback # Try to load the model file success, grader_data = load_model_file(created_model, use_full_path=False) if success: # Send to ML grading algorithm to be graded results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT # If the above fails, try using the full path in the created_model object if not results['success'] and not created_model.model_stored_in_s3: try: success, grader_data = load_model_file(created_model, use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT except: error_message = "Could not find a valid model file." log.exception(error_message) results = RESULT_FAILURE_DICT if m == 0: final_results = results if results['success'] == False: error_message = "Unsuccessful grading: {0}".format(results) log.exception(error_message) transaction.commit() return False, error_message target_scores.append(int(results['score'])) grader_dict = { 'essay': essay, 'target_scores': json.dumps(target_scores), 'grader_type': GraderTypes.machine, 'feedback': '', 'annotated_text': '', 'premium_feedback_scores': json.dumps([]), 'success': final_results['success'], 'confidence': final_results['confidence'], } # Create grader object in controller by posting back results essay_grade = EssayGrade(**grader_dict) essay_grade.save() # Update the essay so that it doesn't keep trying to re-grade essay.has_been_ml_graded = True essay.save() # copy permissions from the essay to the essaygrade helpers.copy_permissions(essay, Essay, essay_grade, EssayGrade) transaction.commit() return True, "Successfully scored!"