def get_indexer(options):
    """Generate an indexer describing the locations of fields within data."""
    if options.data_format == 'simple':
        idx_pl = FieldIndexer(FieldIndexer.simple_fields)
    else:
        idx_pl = FieldIndexer(FieldIndexer.plog_fields)
    return idx_pl
Beispiel #2
0
def get_student_responses(students_filepath, data_format='simple'):
    """Given a set of student histories in a file, convert to item responses

    The students file should be indexable by an indexer.
    """
    history = []
    current_user = ''
    indexer = FieldIndexer.get_for_slug(data_format)
    with open(students_filepath, 'r') as outfile:
        for line in outfile:
            line = line.split(',')
            user = line[indexer.user]
            correct = line[indexer.correct]
            exercise = line[indexer.exercise]
            if history and user != current_user:
                yield user, history
                current_user = user
                history = []
            response = mirt.engine.ItemResponse.new(correct=correct,
                                                    exercise=exercise)
            history.append(response)
Beispiel #3
0
def get_student_responses(students_filepath, data_format='simple'):
    """Given a set of student histories in a file, convert to item responses

    The students file should be indexable by an indexer.
    """
    history = []
    current_user = ''
    indexer = FieldIndexer.get_for_slug(data_format)
    with open(students_filepath, 'r') as outfile:
        for line in outfile:
            line = line.split(',')
            user = line[indexer.user]
            correct = line[indexer.correct]
            exercise = line[indexer.exercise]
            if history and user != current_user:
                yield user, history
                current_user = user
                history = []
            response = mirt.engine.ItemResponse.new(
                correct=correct, exercise=exercise)
            history.append(response)
Beispiel #4
0
def load_and_simulate_assessment(json_filepath,
                                 roc_filepath,
                                 test_filepath,
                                 data_format='simple',
                                 evaluation_item_index=None):
    """Loads a json mirt file and a test file with assessments to evaluate.

    Some questions are marked as evaluation items, and these items are held
    out from training the model they are associated with, and instead are
    used to evaluate the model for accuracy, by training the model with all
    non-evaluation items, and then recording those predictions.

    Those predictions and the ground truth are written to the file
    at roc_file, and used to evaluate the accuracy of the algorithm, likely
    with a ROC curve.

    Arguments:
        json_filepath: The complete filepath the the mirt file. This is the
            format that is generated by mirt_npz_to_json.py. This file will be
            read.
        roc_filepath: The complete filepath to the file we will predictions to.
            This file will be written in a format parseable by
            plot_roc_curves.py (in particular, <response,prediction> where
            response is 0 or 1 depending on accuracy, and prediction is the
            prediction output by the model.)
        test_filepath: The file containing test data. This is going to be comma
            separated values with arguments specified by the optional arguments
            to load_and_simulate_assessment.

        The index arguments refer to the index within the test data at which
        various values are located

        user_index: The index at which the user id lives. This user id is used
            to detect when one assessment ends and the next begins.
        exercise_index: The index at which the exercise id lives. This index is
            used to store the slug of the exercise, which should be the same
            here as it is in the json model.
        time_index: The index at which the amount of time taken to solve the
            problem in seconds is stored.
        correct_index: The index at which whether the student answered
            correctly or not is stored. When true, this value should be stored
            as 'True' or 'true' (without the quotes.)
        evaluation_item_index: The index of the flag used to indicate whether
            this response should be used to generate the ROC curve.
            If the response should be held out, this value should be 'true' or
            'True'.  If there is no such value, keep a random item.
    """
    # Load the parameters from the json parameter file.
    # with open(json_filepath, 'r') as json_file:
    #     params = json.load(json_file)['params']
    #     params['theta_flat'] = numpy.array(params['theta_flat'])
    params = mirt.mirt_util.json_to_data(json_filepath)

    # Load the indexer for the data
    indexer = FieldIndexer.get_for_slug(data_format)

    datapoints = []

    # Iterate through each user's data, writing out a datapoint for
    # each user.
    with open(roc_filepath, 'w') as outfile, \
            open(test_filepath, 'r') as test_data:

        user = ''
        model = mirt.mirt_engine.MIRTEngine(params)
        history = []
        evaluation_indexes = []
        model.only_live_exercises = False

        for line in test_data:
            # Read in the next line
            new_user, ex, time, correct, is_evaluation = parse_line(
                line, indexer, evaluation_item_index)

            # When we see a new user reset the model and calculate predictions.
            if user != new_user:
                # Generate the datapoint for the existing user history
                if user:
                    datapoints.extend(
                        write_roc_datapoint(history, evaluation_indexes, model,
                                            outfile))

                # Reset all of the variables.
                user = new_user
                model = mirt.mirt_engine.MIRTEngine(params)
                history = []
                evaluation_indexes = []

            # Finally, append the response to the history
            response = mirt.engine.ItemResponse.new(correct=correct,
                                                    exercise=ex,
                                                    time_taken=time)
            history.append(response.data)

            # Save the indexes of the evaluation items for use when generating
            # points for the ROC curve.
            if is_evaluation:
                evaluation_indexes.append(len(history) - 1)
        test_data.close()
        outfile.close()
    return datapoints
Beispiel #5
0
def load_and_simulate_assessment(
        json_filepath, roc_filepath, test_filepath, data_format='simple',
        evaluation_item_index=None):
    """Loads a json mirt file and a test file with assessments to evaluate.

    Some questions are marked as evaluation items, and these items are held
    out from training the model they are associated with, and instead are
    used to evaluate the model for accuracy, by training the model with all
    non-evaluation items, and then recording those predictions.

    Those predictions and the ground truth are written to the file
    at roc_file, and used to evaluate the accuracy of the algorithm, likely
    with a ROC curve.

    Arguments:
        json_filepath: The complete filepath the the mirt file. This is the
            format that is generated by mirt_npz_to_json.py. This file will be
            read.
        roc_filepath: The complete filepath to the file we will predictions to.
            This file will be written in a format parseable by
            plot_roc_curves.py (in particular, <response,prediction> where
            response is 0 or 1 depending on accuracy, and prediction is the
            prediction output by the model.)
        test_filepath: The file containing test data. This is going to be comma
            separated values with arguments specified by the optional arguments
            to load_and_simulate_assessment.

        The index arguments refer to the index within the test data at which
        various values are located

        user_index: The index at which the user id lives. This user id is used
            to detect when one assessment ends and the next begins.
        exercise_index: The index at which the exercise id lives. This index is
            used to store the slug of the exercise, which should be the same
            here as it is in the json model.
        time_index: The index at which the amount of time taken to solve the
            problem in seconds is stored.
        correct_index: The index at which whether the student answered
            correctly or not is stored. When true, this value should be stored
            as 'True' or 'true' (without the quotes.)
        evaluation_item_index: The index of the flag used to indicate whether
            this response should be used to generate the ROC curve.
            If the response should be held out, this value should be 'true' or
            'True'.  If there is no such value, keep a random item.
    """
    # Load the parameters from the json parameter file.
    # with open(json_filepath, 'r') as json_file:
    #     params = json.load(json_file)['params']
    #     params['theta_flat'] = numpy.array(params['theta_flat'])
    params = mirt.mirt_util.json_to_data(json_filepath)

    # Load the indexer for the data
    indexer = FieldIndexer.get_for_slug(data_format)

    datapoints = []

    # Iterate through each user's data, writing out a datapoint for
    # each user.
    with open(roc_filepath, 'w') as outfile, \
            open(test_filepath, 'r') as test_data:

        user = ''
        model = mirt.mirt_engine.MIRTEngine(params)
        history = []
        evaluation_indexes = []
        model.only_live_exercises = False

        for line in test_data:
            # Read in the next line
            new_user, ex, time, correct, is_evaluation = parse_line(
                line, indexer, evaluation_item_index)

            # When we see a new user reset the model and calculate predictions.
            if user != new_user:
                # Generate the datapoint for the existing user history
                if user:
                    datapoints.extend(
                        write_roc_datapoint(
                            history, evaluation_indexes, model, outfile))

                # Reset all of the variables.
                user = new_user
                model = mirt.mirt_engine.MIRTEngine(params)
                history = []
                evaluation_indexes = []

            # Finally, append the response to the history
            response = mirt.engine.ItemResponse.new(
                correct=correct, exercise=ex, time_taken=time)
            history.append(response.data)

            # Save the indexes of the evaluation items for use when generating
            # points for the ROC curve.
            if is_evaluation:
                evaluation_indexes.append(len(history) - 1)
        test_data.close()
        outfile.close()
    return datapoints