def handle_single_location(location): try: transaction.commit() gc.collect() subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location) log.debug("Checking location {0} to see if essay count {1} greater than min {2}".format( location, subs_graded_by_instructor.count(), settings.MIN_TO_USE_ML, )) graded_sub_count=subs_graded_by_instructor.count() #check to see if there are enough instructor graded essays for location if graded_sub_count >= settings.MIN_TO_USE_ML: location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False) sub_rubric_scores=[] if len(location_suffixes)>0: for sub in subs_graded_by_instructor: success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub) sub_rubric_scores.append(scores) if settings.MAX_TO_USE_ML<graded_sub_count: graded_sub_count = settings.MAX_TO_USE_ML subs_graded_by_instructor = subs_graded_by_instructor[:settings.MAX_TO_USE_ML] for m in xrange(0,len(location_suffixes)): log.debug("Currently on location {0}. Greater than zero is a rubric item.".format(m)) suffix=location_suffixes[m] #Get paths to ml model from database relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix) #Get last created model for given location transaction.commit() success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix) if success: sub_count_diff=graded_sub_count-latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 5 graded essays. if not success or sub_count_diff>=5: text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor] ids=[i.id for i in subs_graded_by_instructor] #TODO: Make queries more efficient #This is for the basic overall score if m==0: scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)] else: scores=[z[m-1] for z in sub_rubric_scores] #Get the first graded submission, so that we can extract metadata like rubric, etc, from it first_sub=subs_graded_by_instructor[0] prompt = str(first_sub.prompt.encode('ascii', 'ignore')) rubric = str(first_sub.rubric.encode('ascii', 'ignore')) transaction.commit() #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: now = timezone.now() second_difference = (now - created_model.date_modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format( location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False if not model_started: created_model_dict_initial={ 'max_score' : first_sub.max_score, 'prompt' : prompt, 'rubric' : rubric, 'location' : location + suffix, 'course_id' : first_sub.course_id, 'submission_ids_used' : json.dumps(ids), 'problem_id' : first_sub.problem_id, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'number_of_essays' : graded_sub_count, 'creation_succeeded': False, 'creation_started' : True, 'creation_finished' : False, } transaction.commit() success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial) transaction.commit() results = create.create(text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path, 'relative_model_path' : relative_model_path, 'prompt' : prompt}) #Try to create model if ml model creator was successful if results['success']: try: success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS) results.update({'s3_public_url' : s3_public_url, 'success' : success}) if not success: results['errors'].append("Could not save model.") except: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final={ 'cv_kappa' : results['cv_kappa'], 'cv_mean_absolute_error' : results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 's3_public_url' : results['s3_public_url'], 'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS, 's3_bucketname' : str(settings.S3_BUCKETNAME), 'creation_finished' : True, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'location' : location + suffix, } transaction.commit() success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id) if not success: log.error("ModelCreator creation failed. Error: {0}".format(id)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:False", "location:{0}".format(location)]) log.debug("Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:{0}".format(results['success']), "location:{0}".format(location)]) util.log_connection_data() except: log.exception("Problem creating model for location {0}".format(location)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:Exception", "location:{0}".format(location)])
def handle_single_item(controller_session): sub_get_success, content = get_item_from_controller(controller_session) #Grade and handle here if sub_get_success: transaction.commit() sub = Submission.objects.get(id=int(content['submission_id'])) sl = staff_grading_util.StaffLocation(sub.location) subs_graded_by_instructor = sl.graded() first_sub = subs_graded_by_instructor.order_by('date_created')[0] parsed_rubric = rubric_functions.parse_rubric(first_sub.rubric) #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = sub.student_response.encode('ascii', 'ignore') #Get the latest created model for the given location transaction.commit() location_suffixes = ml_grading_util.generate_rubric_location_suffixes( subs_graded_by_instructor, grading=True) if len(location_suffixes) > 0: rubric_scores_complete = True rubric_scores = [] for m in xrange(0, len(location_suffixes)): suffix = location_suffixes[m] success, created_model = ml_grading_util.get_latest_created_model( sub.location + suffix) if not success: log.error("Could not identify a valid created model!") if m == 0: results = RESULT_FAILURE_DICT formatted_feedback = "error" status = GraderStatus.failure statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) else: #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH, created_model.model_relative_path) model_stored_in_s3 = created_model.model_stored_in_s3 success, grader_data = load_model_file(created_model, use_full_path=False) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results[ 'success'] and not created_model.model_stored_in_s3: grader_path = created_model.model_full_path try: success, grader_data = load_model_file( created_model, use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT except Exception: error_message = "Could not find a valid model file." log.exception(error_message) results = RESULT_FAILURE_DICT log.info("ML Grader: Success: {0} Errors: {1}".format( results['success'], results['errors'])) statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=[ "success:{0}".format(results['success']), 'location:{0}'.format(sub.location) ]) #Set grader status according to success/fail if results['success']: status = GraderStatus.success else: status = GraderStatus.failure if m == 0: final_results = results elif results['success'] == False: rubric_scores_complete = False else: rubric_scores.append(int(results['score'])) if len(rubric_scores) == 0: rubric_scores_complete = False grader_dict = { 'score': int(final_results['score']), 'feedback': json.dumps(results['feedback']), 'status': status, 'grader_id': 1, 'grader_type': "ML", 'confidence': results['confidence'], 'submission_id': sub.id, 'errors': ' '.join(results['errors']), 'rubric_scores_complete': rubric_scores_complete, 'rubric_scores': json.dumps(rubric_scores), } #Create grader object in controller by posting back results created, msg = util._http_post( controller_session, urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'], project_urls.ControllerURLs.put_result), grader_dict, settings.REQUESTS_TIMEOUT, ) else: log.error("Error getting item from controller or no items to get.") statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) util.log_connection_data() return sub_get_success
def handle_single_item(controller_session): sub_get_success, content = get_item_from_controller(controller_session) #Grade and handle here if sub_get_success: transaction.commit() sub = Submission.objects.get(id=int(content['submission_id'])) sl = staff_grading_util.StaffLocation(sub.location) subs_graded_by_instructor = sl.graded() first_sub = subs_graded_by_instructor.order_by('date_created')[0] parsed_rubric=rubric_functions.parse_rubric(first_sub.rubric) #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = sub.student_response.encode('ascii', 'ignore') #Get the latest created model for the given location transaction.commit() location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading = True) if len(location_suffixes)>0: rubric_scores_complete=True rubric_scores=[] for m in xrange(0,len(location_suffixes)): suffix = location_suffixes[m] success, created_model=ml_grading_util.get_latest_created_model(sub.location + suffix) if not success: log.error("Could not identify a valid created model!") if m==0: results= RESULT_FAILURE_DICT formatted_feedback="error" status=GraderStatus.failure statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) else: #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH,created_model.model_relative_path) model_stored_in_s3=created_model.model_stored_in_s3 success, grader_data=load_model_file(created_model,use_full_path=False) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results['success'] and not created_model.model_stored_in_s3: grader_path=created_model.model_full_path try: success, grader_data=load_model_file(created_model,use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT except Exception: error_message="Could not find a valid model file." log.exception(error_message) results=RESULT_FAILURE_DICT log.info("ML Grader: Success: {0} Errors: {1}".format(results['success'], results['errors'])) statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:{0}".format(results['success']), 'location:{0}'.format(sub.location)]) #Set grader status according to success/fail if results['success']: status = GraderStatus.success else: status = GraderStatus.failure if m==0: final_results=results elif results['success']==False: rubric_scores_complete = False else: rubric_scores.append(int(results['score'])) if len(rubric_scores)==0: rubric_scores_complete=False grader_dict = { 'score': int(final_results['score']), 'feedback': json.dumps(results['feedback']), 'status': status, 'grader_id': 1, 'grader_type': "ML", 'confidence': results['confidence'], 'submission_id': sub.id, 'errors' : ' ' .join(results['errors']), 'rubric_scores_complete' : rubric_scores_complete, 'rubric_scores' : json.dumps(rubric_scores), } #Create grader object in controller by posting back results created, msg = util._http_post( controller_session, urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'], project_urls.ControllerURLs.put_result), grader_dict, settings.REQUESTS_TIMEOUT, ) else: log.error("Error getting item from controller or no items to get.") statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) util.log_connection_data() return sub_get_success
def handle_single_location(location): try: transaction.commit() gc.collect() sl = staff_grading_util.StaffLocation(location) subs_graded_by_instructor = sl.graded() log.info("Checking location {0} to see if essay count {1} greater than min {2}".format( location, subs_graded_by_instructor.count(), settings.MIN_TO_USE_ML, )) graded_sub_count=subs_graded_by_instructor.count() #check to see if there are enough instructor graded essays for location if graded_sub_count >= settings.MIN_TO_USE_ML: location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False) if settings.MAX_TO_USE_ML<graded_sub_count: graded_sub_count = settings.MAX_TO_USE_ML subs_graded_by_instructor = subs_graded_by_instructor[:settings.MAX_TO_USE_ML] sub_rubric_scores=[] if len(location_suffixes)>0: for sub in subs_graded_by_instructor: success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub) sub_rubric_scores.append(scores) for m in xrange(0,len(location_suffixes)): log.info("Currently on location {0}. Greater than zero is a rubric item.".format(m)) suffix=location_suffixes[m] #Get paths to ml model from database relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix) #Get last created model for given location transaction.commit() success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix) if success: sub_count_diff=graded_sub_count-latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 5 graded essays. if not success or sub_count_diff>=5: text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor] ids=[i.id for i in subs_graded_by_instructor] #TODO: Make queries more efficient #This is for the basic overall score if m==0: scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)] else: scores=[z[m-1] for z in sub_rubric_scores] #Get the first graded submission, so that we can extract metadata like rubric, etc, from it first_sub=subs_graded_by_instructor[0] prompt = str(first_sub.prompt.encode('ascii', 'ignore')) rubric = str(first_sub.rubric.encode('ascii', 'ignore')) transaction.commit() #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: now = timezone.now() second_difference = (now - created_model.date_modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format( location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False if not model_started: created_model_dict_initial={ 'max_score' : first_sub.max_score, 'prompt' : prompt, 'rubric' : rubric, 'location' : location + suffix, 'course_id' : first_sub.course_id, 'submission_ids_used' : json.dumps(ids), 'problem_id' : first_sub.problem_id, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'number_of_essays' : graded_sub_count, 'creation_succeeded': False, 'creation_started' : True, 'creation_finished' : False, } transaction.commit() success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial) transaction.commit() results = create.create(text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path, 'relative_model_path' : relative_model_path, 'prompt' : prompt}) #Try to create model if ml model creator was successful if results['success']: try: success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS) results.update({'s3_public_url' : s3_public_url, 'success' : success}) if not success: results['errors'].append("Could not save model.") except Exception: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final={ 'cv_kappa' : results['cv_kappa'], 'cv_mean_absolute_error' : results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 's3_public_url' : results['s3_public_url'], 'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS, 's3_bucketname' : str(settings.S3_BUCKETNAME), 'creation_finished' : True, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'location' : location + suffix, } transaction.commit() success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id) else: log.error("Could not create an ML model. Have you installed all the needed requirements for ease? This is for location {0} and rubric item {1}".format(location, m)) if not success: log.error("ModelCreator creation failed. Error: {0}".format(id)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:False", "location:{0}".format(location)]) log.info("Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:{0}".format(results['success']), "location:{0}".format(location)]) util.log_connection_data() except Exception: log.exception("Problem creating model for location {0}".format(location)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:Exception", "location:{0}".format(location)])