def get_submission_ml(request): """ Gets a submission for the ML grader Input: Get request with no parameters """ unique_locations = [x["location"] for x in list(Submission.objects.values("location").distinct())] for location in unique_locations: subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location).count() success = ml_grading_util.check_for_all_model_and_rubric_success(location) if subs_graded_by_instructor >= settings.MIN_TO_USE_ML and success: to_be_graded = Submission.objects.filter( location=location, state=SubmissionState.waiting_to_be_graded, next_grader_type="ML" ) if to_be_graded.count() > 0: to_be_graded = to_be_graded[0] if to_be_graded is not None: to_be_graded.state = SubmissionState.being_graded to_be_graded.save() # Insert timing initialization code initialize_timing(to_be_graded) return util._success_response({"submission_id": to_be_graded.id}, _INTERFACE_VERSION) util.log_connection_data() return util._error_response("Nothing to grade.", _INTERFACE_VERSION)
def get_submission_ml(request): """ Gets a submission for the ML grader Input: Get request with no parameters """ unique_locations = [x['location'] for x in list(Submission.objects.values('location').distinct())] for location in unique_locations: subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location).count() success = ml_grading_util.check_for_all_model_and_rubric_success(location) if subs_graded_by_instructor >= settings.MIN_TO_USE_ML and success: to_be_graded = Submission.objects.filter( location=location, state=SubmissionState.waiting_to_be_graded, next_grader_type="ML", ) if(to_be_graded.count() > 0): to_be_graded = to_be_graded[0] if to_be_graded is not None: to_be_graded.state = SubmissionState.being_graded to_be_graded.save() #Insert timing initialization code initialize_timing(to_be_graded) return util._success_response({'submission_id' : to_be_graded.id}, _INTERFACE_VERSION) util.log_connection_data() return util._error_response("Nothing to grade.", _INTERFACE_VERSION)
def handle_single_location(location): try: transaction.commit() gc.collect() subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(location) log.debug("Checking location {0} to see if essay count {1} greater than min {2}".format( location, subs_graded_by_instructor.count(), settings.MIN_TO_USE_ML, )) graded_sub_count=subs_graded_by_instructor.count() #check to see if there are enough instructor graded essays for location if graded_sub_count >= settings.MIN_TO_USE_ML: location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading=False) sub_rubric_scores=[] if len(location_suffixes)>0: for sub in subs_graded_by_instructor: success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores(sub) sub_rubric_scores.append(scores) if settings.MAX_TO_USE_ML<graded_sub_count: graded_sub_count = settings.MAX_TO_USE_ML subs_graded_by_instructor = subs_graded_by_instructor[:settings.MAX_TO_USE_ML] for m in xrange(0,len(location_suffixes)): log.debug("Currently on location {0}. Greater than zero is a rubric item.".format(m)) suffix=location_suffixes[m] #Get paths to ml model from database relative_model_path, full_model_path= ml_grading_util.get_model_path(location + suffix) #Get last created model for given location transaction.commit() success, latest_created_model=ml_grading_util.get_latest_created_model(location + suffix) if success: sub_count_diff=graded_sub_count-latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 5 graded essays. if not success or sub_count_diff>=5: text = [str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor] ids=[i.id for i in subs_graded_by_instructor] #TODO: Make queries more efficient #This is for the basic overall score if m==0: scores = [z.get_last_grader().score for z in list(subs_graded_by_instructor)] else: scores=[z[m-1] for z in sub_rubric_scores] #Get the first graded submission, so that we can extract metadata like rubric, etc, from it first_sub=subs_graded_by_instructor[0] prompt = str(first_sub.prompt.encode('ascii', 'ignore')) rubric = str(first_sub.rubric.encode('ascii', 'ignore')) transaction.commit() #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started(location + suffix) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: now = timezone.now() second_difference = (now - created_model.date_modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.error("Model for location {0} started over {1} seconds ago, removing and re-attempting.".format( location + suffix, settings.TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False if not model_started: created_model_dict_initial={ 'max_score' : first_sub.max_score, 'prompt' : prompt, 'rubric' : rubric, 'location' : location + suffix, 'course_id' : first_sub.course_id, 'submission_ids_used' : json.dumps(ids), 'problem_id' : first_sub.problem_id, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'number_of_essays' : graded_sub_count, 'creation_succeeded': False, 'creation_started' : True, 'creation_finished' : False, } transaction.commit() success, initial_id = ml_grading_util.save_created_model(created_model_dict_initial) transaction.commit() results = create.create(text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({'text' : text, 'score' : scores, 'model_path' : full_model_path, 'relative_model_path' : relative_model_path, 'prompt' : prompt}) #Try to create model if ml model creator was successful if results['success']: try: success, s3_public_url = save_model_file(results,settings.USE_S3_TO_STORE_MODELS) results.update({'s3_public_url' : s3_public_url, 'success' : success}) if not success: results['errors'].append("Could not save model.") except: results['errors'].append("Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final={ 'cv_kappa' : results['cv_kappa'], 'cv_mean_absolute_error' : results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 's3_public_url' : results['s3_public_url'], 'model_stored_in_s3' : settings.USE_S3_TO_STORE_MODELS, 's3_bucketname' : str(settings.S3_BUCKETNAME), 'creation_finished' : True, 'model_relative_path' : relative_model_path, 'model_full_path' : full_model_path, 'location' : location + suffix, } transaction.commit() success, id = ml_grading_util.save_created_model(created_model_dict_final,update_model=True,update_id=initial_id) if not success: log.error("ModelCreator creation failed. Error: {0}".format(id)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:False", "location:{0}".format(location)]) log.debug("Location: {0} Creation Status: {1} Errors: {2}".format( full_model_path, results['success'], results['errors'], )) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:{0}".format(results['success']), "location:{0}".format(location)]) util.log_connection_data() except: log.exception("Problem creating model for location {0}".format(location)) statsd.increment("open_ended_assessment.grading_controller.call_ml_creator", tags=["success:Exception", "location:{0}".format(location)])
def handle_single_item(controller_session): sub_get_success, content = get_item_from_controller(controller_session) #Grade and handle here if sub_get_success: transaction.commit() sub = Submission.objects.get(id=int(content['submission_id'])) subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor(sub.location) first_sub = subs_graded_by_instructor.order_by('date_created')[0] parsed_rubric=rubric_functions.parse_rubric(first_sub.rubric) #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = sub.student_response.encode('ascii', 'ignore') #Get the latest created model for the given location transaction.commit() location_suffixes=ml_grading_util.generate_rubric_location_suffixes(subs_graded_by_instructor, grading = True) if len(location_suffixes)>0: rubric_scores_complete=True rubric_scores=[] for m in xrange(0,len(location_suffixes)): suffix = location_suffixes[m] success, created_model=ml_grading_util.get_latest_created_model(sub.location + suffix) if not success: log.error("Could not identify a valid created model!") if m==0: results= RESULT_FAILURE_DICT formatted_feedback="error" status=GraderStatus.failure statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) else: #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH,created_model.model_relative_path) model_stored_in_s3=created_model.model_stored_in_s3 success, grader_data=load_model_file(created_model,use_full_path=False) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results['success'] and not created_model.model_stored_in_s3: grader_path=created_model.model_full_path try: success, grader_data=load_model_file(created_model,use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results=RESULT_FAILURE_DICT except Exception: error_message="Could not find a valid model file." log.exception(error_message) results=RESULT_FAILURE_DICT log.info("ML Grader: Success: {0} Errors: {1}".format(results['success'], results['errors'])) statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:{0}".format(results['success']), 'location:{0}'.format(sub.location)]) #Set grader status according to success/fail if results['success']: status = GraderStatus.success else: status = GraderStatus.failure if m==0: final_results=results elif results['success']==False: rubric_scores_complete = False else: rubric_scores.append(int(results['score'])) if len(rubric_scores)==0: rubric_scores_complete=False grader_dict = { 'score': int(final_results['score']), 'feedback': json.dumps(results['feedback']), 'status': status, 'grader_id': 1, 'grader_type': "ML", 'confidence': results['confidence'], 'submission_id': sub.id, 'errors' : ' ' .join(results['errors']), 'rubric_scores_complete' : rubric_scores_complete, 'rubric_scores' : json.dumps(rubric_scores), } #Create grader object in controller by posting back results created, msg = util._http_post( controller_session, urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'], project_urls.ControllerURLs.put_result), grader_dict, settings.REQUESTS_TIMEOUT, ) else: log.error("Error getting item from controller or no items to get.") statsd.increment("open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) util.log_connection_data() return sub_get_success
def handle_single_location(location): try: transaction.commit() gc.collect() subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor( location) log.debug( "Checking location {0} to see if essay count {1} greater than min {2}" .format( location, subs_graded_by_instructor.count(), settings.MIN_TO_USE_ML, )) graded_sub_count = subs_graded_by_instructor.count() #check to see if there are enough instructor graded essays for location if graded_sub_count >= settings.MIN_TO_USE_ML: location_suffixes = ml_grading_util.generate_rubric_location_suffixes( subs_graded_by_instructor, grading=False) sub_rubric_scores = [] if len(location_suffixes) > 0: for sub in subs_graded_by_instructor: success, scores = controller.rubric_functions.get_submission_rubric_instructor_scores( sub) sub_rubric_scores.append(scores) if settings.MAX_TO_USE_ML < graded_sub_count: graded_sub_count = settings.MAX_TO_USE_ML subs_graded_by_instructor = subs_graded_by_instructor[:settings. MAX_TO_USE_ML] for m in xrange(0, len(location_suffixes)): log.debug( "Currently on location {0}. Greater than zero is a rubric item." .format(m)) suffix = location_suffixes[m] #Get paths to ml model from database relative_model_path, full_model_path = ml_grading_util.get_model_path( location + suffix) #Get last created model for given location transaction.commit() success, latest_created_model = ml_grading_util.get_latest_created_model( location + suffix) if success: sub_count_diff = graded_sub_count - latest_created_model.number_of_essays else: sub_count_diff = graded_sub_count #Retrain if no model exists, or every 5 graded essays. if not success or sub_count_diff >= 5: text = [ str(i.student_response.encode('ascii', 'ignore')) for i in subs_graded_by_instructor ] ids = [i.id for i in subs_graded_by_instructor] #TODO: Make queries more efficient #This is for the basic overall score if m == 0: scores = [ z.get_last_grader().score for z in list(subs_graded_by_instructor) ] else: scores = [z[m - 1] for z in sub_rubric_scores] #Get the first graded submission, so that we can extract metadata like rubric, etc, from it first_sub = subs_graded_by_instructor[0] prompt = str(first_sub.prompt.encode('ascii', 'ignore')) rubric = str(first_sub.rubric.encode('ascii', 'ignore')) transaction.commit() #Checks to see if another model creator process has started amodel for this location success, model_started, created_model = ml_grading_util.check_if_model_started( location + suffix) #Checks to see if model was started a long time ago, and removes and retries if it was. if model_started: now = timezone.now() second_difference = ( now - created_model.date_modified).total_seconds() if second_difference > settings.TIME_BEFORE_REMOVING_STARTED_MODEL: log.error( "Model for location {0} started over {1} seconds ago, removing and re-attempting." .format( location + suffix, settings. TIME_BEFORE_REMOVING_STARTED_MODEL)) created_model.delete() model_started = False if not model_started: created_model_dict_initial = { 'max_score': first_sub.max_score, 'prompt': prompt, 'rubric': rubric, 'location': location + suffix, 'course_id': first_sub.course_id, 'submission_ids_used': json.dumps(ids), 'problem_id': first_sub.problem_id, 'model_relative_path': relative_model_path, 'model_full_path': full_model_path, 'number_of_essays': graded_sub_count, 'creation_succeeded': False, 'creation_started': True, 'creation_finished': False, } transaction.commit() success, initial_id = ml_grading_util.save_created_model( created_model_dict_initial) transaction.commit() results = create.create(text, scores, prompt) scores = [int(score_item) for score_item in scores] #Add in needed stuff that ml creator does not pass back results.update({ 'text': text, 'score': scores, 'model_path': full_model_path, 'relative_model_path': relative_model_path, 'prompt': prompt }) #Try to create model if ml model creator was successful if results['success']: try: success, s3_public_url = save_model_file( results, settings.USE_S3_TO_STORE_MODELS) results.update({ 's3_public_url': s3_public_url, 'success': success }) if not success: results['errors'].append( "Could not save model.") except: results['errors'].append( "Could not save model.") results['s3_public_url'] = "" log.exception("Problem saving ML model.") created_model_dict_final = { 'cv_kappa': results['cv_kappa'], 'cv_mean_absolute_error': results['cv_mean_absolute_error'], 'creation_succeeded': results['success'], 's3_public_url': results['s3_public_url'], 'model_stored_in_s3': settings.USE_S3_TO_STORE_MODELS, 's3_bucketname': str(settings.S3_BUCKETNAME), 'creation_finished': True, 'model_relative_path': relative_model_path, 'model_full_path': full_model_path, 'location': location + suffix, } transaction.commit() success, id = ml_grading_util.save_created_model( created_model_dict_final, update_model=True, update_id=initial_id) if not success: log.error( "ModelCreator creation failed. Error: {0}". format(id)) statsd.increment( "open_ended_assessment.grading_controller.call_ml_creator", tags=[ "success:False", "location:{0}".format(location) ]) log.debug( "Location: {0} Creation Status: {1} Errors: {2}". format( full_model_path, results['success'], results['errors'], )) statsd.increment( "open_ended_assessment.grading_controller.call_ml_creator", tags=[ "success:{0}".format(results['success']), "location:{0}".format(location) ]) util.log_connection_data() except: log.exception( "Problem creating model for location {0}".format(location)) statsd.increment( "open_ended_assessment.grading_controller.call_ml_creator", tags=["success:Exception", "location:{0}".format(location)])
def handle_single_item(controller_session): sub_get_success, content = get_item_from_controller(controller_session) log.debug(content) #Grade and handle here if sub_get_success: transaction.commit() sub = Submission.objects.get(id=int(content['submission_id'])) subs_graded_by_instructor = staff_grading_util.finished_submissions_graded_by_instructor( sub.location) first_sub = subs_graded_by_instructor.order_by('date_created')[0] parsed_rubric = rubric_functions.parse_rubric(first_sub.rubric) #strip out unicode and other characters in student response #Needed, or grader may potentially fail #TODO: Handle unicode in student responses properly student_response = sub.student_response.encode('ascii', 'ignore') #Get the latest created model for the given location transaction.commit() location_suffixes = ml_grading_util.generate_rubric_location_suffixes( subs_graded_by_instructor, grading=True) if len(location_suffixes) > 0: rubric_scores_complete = True rubric_scores = [] for m in xrange(0, len(location_suffixes)): suffix = location_suffixes[m] success, created_model = ml_grading_util.get_latest_created_model( sub.location + suffix) if not success: log.debug("Could not identify a valid created model!") if m == 0: results = RESULT_FAILURE_DICT formatted_feedback = "error" status = GraderStatus.failure statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) else: #Create grader path from location in submission grader_path = os.path.join(settings.ML_MODEL_PATH, created_model.model_relative_path) model_stored_in_s3 = created_model.model_stored_in_s3 success, grader_data = load_model_file(created_model, use_full_path=False) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT #If the above fails, try using the full path in the created_model object if not results[ 'success'] and not created_model.model_stored_in_s3: grader_path = created_model.model_full_path try: success, grader_data = load_model_file( created_model, use_full_path=True) if success: results = grade.grade(grader_data, student_response) else: results = RESULT_FAILURE_DICT except: error_message = "Could not find a valid model file." log.exception(error_message) results = RESULT_FAILURE_DICT log.debug("ML Grader: Success: {0} Errors: {1}".format( results['success'], results['errors'])) statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=[ "success:{0}".format(results['success']), 'location:{0}'.format(sub.location) ]) #Set grader status according to success/fail if results['success']: status = GraderStatus.success else: status = GraderStatus.failure if m == 0: final_results = results elif results['success'] == False: rubric_scores_complete = False else: rubric_scores.append(int(results['score'])) if len(rubric_scores) == 0: rubric_scores_complete = False grader_dict = { 'score': int(final_results['score']), 'feedback': json.dumps(results['feedback']), 'status': status, 'grader_id': 1, 'grader_type': "ML", 'confidence': results['confidence'], 'submission_id': sub.id, 'errors': ' '.join(results['errors']), 'rubric_scores_complete': rubric_scores_complete, 'rubric_scores': json.dumps(rubric_scores), } #Create grader object in controller by posting back results created, msg = util._http_post( controller_session, urlparse.urljoin(settings.GRADING_CONTROLLER_INTERFACE['url'], project_urls.ControllerURLs.put_result), grader_dict, settings.REQUESTS_TIMEOUT, ) log.debug("Got response of {0} from server, message: {1}".format( created, msg)) else: log.info("Error getting item from controller or no items to get.") statsd.increment( "open_ended_assessment.grading_controller.call_ml_grader", tags=["success:False"]) util.log_connection_data() return sub_get_success