def grade_generic(grader_data, numeric_features, textual_features): """ Grades a set of numeric and textual features using a generic model grader_data -- dictionary containing: { 'algorithm' - Type of algorithm to use to score } numeric_features - list of numeric features to predict on textual_features - list of textual feature to predict on """ results = { 'errors': [], 'tests': [], 'score': 0, 'success': False, 'confidence': 0 } has_error = False #Try to find and load the model file grader_set = predictor_set.PredictorSet(essaytype="test") model, extractor = get_classifier_and_ext(grader_data) #Try to add essays to essay set object try: grader_set.add_row(numeric_features, textual_features, 0) except Exception: error_msg = "Row could not be added to predictor set:{0} {1}".format( numeric_features, textual_features) log.exception(error_msg) results['errors'].append(error_msg) has_error = True #Try to extract features from submission and assign score via the model try: grader_feats = extractor.gen_feats(grader_set) results['score'] = model.predict(grader_feats)[0] except Exception: error_msg = "Could not extract features and score essay." log.exception(error_msg) results['errors'].append(error_msg) has_error = True #Try to determine confidence level try: results['confidence'] = get_confidence_value(grader_data['algorithm'], model, grader_feats, results['score']) except Exception: #If there is an error getting confidence, it is not a show-stopper, so just log log.exception("Problem generating confidence value") if not has_error: results['success'] = True return results
def create_generic(numeric_values, textual_values, target, algorithm=util_functions.AlgorithmTypes.regression): """ Creates a model from a generic list numeric values and text values numeric_values - A list of lists that are the predictors textual_values - A list of lists that are the predictors (each item in textual_values corresponds to the similarly indexed counterpart in numeric_values) target - The variable that we are trying to predict. A list of integers. algorithm - the type of algorithm that will be used """ algorithm = select_algorithm(target) #Initialize a result dictionary to return. results = { 'errors': [], 'success': False, 'cv_kappa': 0, 'cv_mean_absolute_error': 0, 'feature_ext': "", 'classifier': "", 'algorithm': algorithm } if len(numeric_values) != len(textual_values) or len( numeric_values) != len(target): msg = "Target, numeric features, and text features must all be the same length." results['errors'].append(msg) log.exception(msg) return results try: #Initialize a predictor set object that encapsulates all of the text and numeric predictors pset = predictor_set.PredictorSet(essaytype="train") for i in range(0, len(numeric_values)): pset.add_row(numeric_values[i], textual_values[i], target[i]) except: msg = "predictor set creation failed." results['errors'].append(msg) log.exception(msg) try: #Extract all features and then train a classifier with the features feature_ext, classifier, cv_error_results = model_creator.extract_features_and_generate_model_predictors( pset, algorithm) results['cv_kappa'] = cv_error_results['kappa'] results['cv_mean_absolute_error'] = cv_error_results['mae'] results['feature_ext'] = feature_ext results['classifier'] = classifier results['success'] = True except: msg = "feature extraction and model creation failed." results['errors'].append(msg) log.exception(msg) return results