def create_similar_token(read_file_affix, method, find_nearest_comparison,
                         remediation_sample_number):
    root_path = os.path.split(os.getcwd())[0]
    print('root path: ' + root_path)
    print('read file: ' + read_file_affix)
    print('method: ' + method)
    print('sample_number: ' + str(remediation_sample_number))
    print('nearest comparison: ' + find_nearest_comparison)
    # [TODO] incorporate window and embedding into read_file_affix
    output_path = root_path + '/' + 'cahl_output' + '/'
    response_vectors = read_embedding_vectors(
        create_file_path(output_path, 'embed_vectors_', read_file_affix))
    response_tokens = read_tokens(
        create_file_path(output_path, 'embed_index_', read_file_affix))
    learning_state_vectors = read_embedding_vectors(
        create_file_path(output_path, 'learning_state_vectors_',
                         read_file_affix))
    learning_state_tokens = read_tokens(
        create_file_path(output_path, 'learning_state_tokens_',
                         read_file_affix))
    similarity_instance = CreateSimilarityToken(response_vectors,
                                                response_tokens,
                                                learning_state_vectors,
                                                learning_state_tokens)

    # find match between learning staste and response tokens
    remediation_match_tokens = similarity_instance.generate_similarity_match(
        find_nearest_comparison=find_nearest_comparison,
        method=method,
        sample_number=remediation_sample_number)

    # find match between learning staste and response tokens
    response_similar_tokens = similarity_instance.generate_similarity_match(
        find_nearest_comparison='response-response',
        method=method,
        sample_number=1)

    print('***CREATE RESPONSE TOKEN**')
    path_affix = create_path_affix(method, find_nearest_comparison,
                                   read_file_affix, remediation_sample_number)

    write_output(similarity=similarity_instance,
                 root_path=root_path,
                 path_affix=path_affix,
                 remediation_match_tokens=remediation_match_tokens,
                 response_similar_tokens=response_similar_tokens)
Esempio n. 2
0
def read_vectors_and_index(output_path, read_file_affix):
    # read the exercise embedding vectors
    # read the exercise embedding index
    vector_filepath = output_path + 'embed_vectors_' + read_file_affix
    index_filepath = output_path + 'embed_index_' + read_file_affix
    response_vectors = read_embedding_vectors(vector_filepath)
    response_tokens = read_tokens(index_filepath)
    prerequisites = read_prerequisite_data('prerequisites', is_json_file=False)
    return response_vectors, response_tokens, prerequisites
Esempio n. 3
0
def convert_embedding_to_tsne(read_file_affix, method,
                              find_nearest_comparison):
    # The read_file_affix needs to be in the form: fullw10e30
    # for remediation matches, we want: fullw10e30r1
    # Read embedding and token file
    # code needs to run in directory where code lives for root path to work
    root_path = os.path.split(os.getcwd())[0]
    print('root path: ' + root_path)
    print('read file: ' + read_file_affix)
    print('method: ' + method)
    print('nearest comparison: ' + find_nearest_comparison)
    output_path = root_path + '/' + 'cahl_output' + '/'
    response_vectors = read_embedding_vectors(
        create_file_path(output_path, 'embed_vectors_', read_file_affix))
    response_tokens = read_tokens(
        create_file_path(output_path, 'embed_index_', read_file_affix))
    learning_state_vectors = read_embedding_vectors(
        create_file_path(output_path, 'learning_state_vectors_',
                         read_file_affix))
    learning_state_tokens = read_tokens(
        create_file_path(output_path, 'learning_state_tokens_',
                         read_file_affix))
    # generate tsne file with the
    print('create TSNE')
    tsne_instance = CreateTSNE(response_vectors, learning_state_vectors,
                               response_tokens, learning_state_tokens)
    response_and_learning_tsne_df = tsne_instance.join_response_and_learning_state_df(
    )

    # read remediation matches
    parameters = method + '_' + find_nearest_comparison + '_' + read_file_affix + 'r1'
    analysis_path = root_path + '/' + 'cahl_analysis' + '/' + parameters + '/'
    remediation_matches = read_remediation_match(analysis_path +
                                                 'remediation_match_tf')
    response_and_remediation_tsne_df = tsne_instance.join_response_and_remediation_df(
        remediation_matches)
    return response_and_learning_tsne_df, response_and_remediation_tsne_df
Esempio n. 4
0
def create_learning_embedding(read_file_affix):
    '''
       Has to run in the folder where the file lives in order for the right directory
       to be called
    '''
    path = os.path.split(os.getcwd())
    root_path = path[0]  
    print('root path: '+ root_path)
    print('read file: '+ read_file_affix)
    response_vectors = read_embedding_vectors(root_path + '/' +
                            'cahl_output/embed_vectors_' + read_file_affix)
    response_tokens = read_tokens(root_path + '/' +
                            'cahl_output/embed_index_' + read_file_affix)
    similarity_instance = CreateSimilarityToken(response_vectors, response_tokens)

    # find match between learning staste and response tokens
    print('***CREATE LEARNING STATE TOKENS**')
    write_learning_state_output(similarity = similarity_instance,
            root_path = root_path,
            read_file_affix = read_file_affix)
def main():
    root_path = os.path.split(os.getcwd())[0] + '/'
    output_path = root_path + 'cahl_output' + '/'
    read_file_affix = 'fullw10e30'

    response_vectors = read_embedding_vectors(root_path +
                                              'cahl_output/embed_vectors_' +
                                              read_file_affix)
    response_tokens = read_tokens(root_path + 'cahl_output/embed_index_' +
                                  read_file_affix)
    grouped_embeddings = group_token_and_vectors_by_exercise(
        response_tokens, response_vectors)
    exercise_tokens, exercise_vectors = average_vectors_by_exercise(
        grouped_embeddings)

    write_token_file(path=output_path,
                     file_name='embed_index_exercise',
                     tokens=exercise_tokens)
    write_vector_file(path=output_path,
                      file_name='embed_vectors_exercise',
                      vectors=exercise_vectors)
Esempio n. 6
0
# TESTING: Change file names
# The file affix represents specs on the model that we will iterate
# W<Windowsize><TokenLevel><SimilarityApproach>, for example
# file_affix = 'W10ResponseCosine'
from util_functions import read_embedding_vectors
from util_functions import read_tokens

root_path = os.path.split(os.getcwd())[0] + '/'
code_path = root_path + 'cahl_remediation_research' + '/'
output_path = root_path + 'cahl_output' + '/'

analysis_path_affix = 'W10CosineResponseC1' + '/'
analysis_path = root_path + 'cahl_analysis' + '/' + analysis_path_affix

read_file_affix = 'full'
response_vectors = read_embedding_vectors(output_path + 'embed_vectors_' +
                                          read_file_affix)
response_tokens = read_tokens(output_path + 'embed_index_' + read_file_affix)

learning_vectors = read_embedding_vectors(analysis_path +
                                          'learning_state_vectors')
learning_tokens = read_tokens(analysis_path + 'learning_state_tokens')

# read the remediation match tokens tokens
remediation_matches = read_remediation_tokens(analysis_path +
                                              'remediation_match_tokens')

selected_subjects = [
    'algebra',
    # 'algebra-basics',
    'algebra2',
    'ap-calculus-ab',