예제 #1
0
def grade_generic(grader_data, numeric_features, textual_features):
    """
    Grades a set of numeric and textual features using a generic model
    grader_data -- dictionary containing:
    {
        'algorithm' - Type of algorithm to use to score
    }
    numeric_features - list of numeric features to predict on
    textual_features - list of textual feature to predict on

    """
    results = {
        'errors': [],
        'tests': [],
        'score': 0,
        'success': False,
        'confidence': 0
    }

    has_error = False

    #Try to find and load the model file

    grader_set = predictor_set.PredictorSet(essaytype="test")

    model, extractor = get_classifier_and_ext(grader_data)

    #Try to add essays to essay set object
    try:
        grader_set.add_row(numeric_features, textual_features, 0)
    except Exception:
        error_msg = "Row could not be added to predictor set:{0} {1}".format(
            numeric_features, textual_features)
        log.exception(error_msg)
        results['errors'].append(error_msg)
        has_error = True

    #Try to extract features from submission and assign score via the model
    try:
        grader_feats = extractor.gen_feats(grader_set)
        results['score'] = model.predict(grader_feats)[0]
    except Exception:
        error_msg = "Could not extract features and score essay."
        log.exception(error_msg)
        results['errors'].append(error_msg)
        has_error = True

    #Try to determine confidence level
    try:
        results['confidence'] = get_confidence_value(grader_data['algorithm'],
                                                     model, grader_feats,
                                                     results['score'])
    except Exception:
        #If there is an error getting confidence, it is not a show-stopper, so just log
        log.exception("Problem generating confidence value")

    if not has_error:
        results['success'] = True

    return results
예제 #2
0
def create_generic(numeric_values,
                   textual_values,
                   target,
                   algorithm=util_functions.AlgorithmTypes.regression):
    """
    Creates a model from a generic list numeric values and text values
    numeric_values - A list of lists that are the predictors
    textual_values - A list of lists that are the predictors
    (each item in textual_values corresponds to the similarly indexed counterpart in numeric_values)
    target - The variable that we are trying to predict.  A list of integers.
    algorithm - the type of algorithm that will be used
    """

    algorithm = select_algorithm(target)
    #Initialize a result dictionary to return.
    results = {
        'errors': [],
        'success': False,
        'cv_kappa': 0,
        'cv_mean_absolute_error': 0,
        'feature_ext': "",
        'classifier': "",
        'algorithm': algorithm
    }

    if len(numeric_values) != len(textual_values) or len(
            numeric_values) != len(target):
        msg = "Target, numeric features, and text features must all be the same length."
        results['errors'].append(msg)
        log.exception(msg)
        return results

    try:
        #Initialize a predictor set object that encapsulates all of the text and numeric predictors
        pset = predictor_set.PredictorSet(essaytype="train")
        for i in range(0, len(numeric_values)):
            pset.add_row(numeric_values[i], textual_values[i], target[i])
    except:
        msg = "predictor set creation failed."
        results['errors'].append(msg)
        log.exception(msg)

    try:
        #Extract all features and then train a classifier with the features
        feature_ext, classifier, cv_error_results = model_creator.extract_features_and_generate_model_predictors(
            pset, algorithm)
        results['cv_kappa'] = cv_error_results['kappa']
        results['cv_mean_absolute_error'] = cv_error_results['mae']
        results['feature_ext'] = feature_ext
        results['classifier'] = classifier
        results['success'] = True
    except:
        msg = "feature extraction and model creation failed."
        results['errors'].append(msg)
        log.exception(msg)

    return results