Exemple #1
0
def display_confmat_compare():
    """
    This function compares the estimated tasks through assoc rules (and relating post 
    proc) to the manual annotations and displays that nicely as a confusion
    matrix.
    
    In what follows, 
    auto refers to automatic annoattion (with the assoc rules and relating post-proc)
    hand refers to manual annotation
    """
    data = ftools.readData( params.ANNOTATION_FILE )
    data = post_pro.addDurationFeature( data ) # Just putting the data into a 'good' shape
    titles = ftools.load(params.PATH_TITLE[6:]+params.NEW_DAT+params.TITLE_MAT)
    data[params.WINDOW_STR] = [ftools.joinTitles(t) for t in titles]
    
    
    auto = np.array(data[params.AUTO_TASK_STR]) 
    hand = np.array(data[params.HAND_TASK_STR]) 
    unique_tasks = np.unique(hand)
    n_tasks = len(unique_tasks)
    conf_mat = np.zeros((n_tasks, n_tasks))
    for i, task1 in enumerate(unique_tasks):
        for j, task2 in enumerate(unique_tasks):    
            conf_mat[j,i] = np.nansum(np.logical_and(auto == task1, hand == task2))
    
    conf_mat_s = conf_mat #Save unscaled matrix
    conf_mat = conf_mat / np.nansum(conf_mat, axis=1)[:,None]
    
    corrects = np.sum([conf_mat_s[i,i] for i in range(len(conf_mat_s))])
    print('Correct ', corrects, corrects/np.sum(conf_mat_s))
    
    printTable(unique_tasks, conf_mat)
    printTable(unique_tasks, conf_mat_s)
    
    """
    classifier_tools.kNN(data)
    classifier_tools.randomForest(data)
    classifier_tools.svm(data)
    """

    displayDurationDensities(data)
Exemple #2
0
def example_q_quartiles():
    """
    This function serves to display an example of computing Q-quartiles. 
    
    Specifically, it is for reproducing the graph given in Appendix-I (Pre-processing
    of quantitaive variables). The below saves the data points into a file, which is
    later used in a gnu-plot script.
    """
    
    data = ftools.readData( params.ANNOTATION_FILE )
    data = post_pro.addDurationFeature( data ) # Just putting the data into a 'good' shape
    titles = ftools.load(params.PATH_TITLE[6:]+params.NEW_DAT+params.TITLE_MAT)
    data[params.WINDOW_STR] = [ftools.joinTitles(t) for t in titles]
    array = list(map(int, data[params.DURATION_STR]))
    
    y, x = np.histogram(array, bins=np.arange(np.max(array), step=1)+1) 
    x = x[:-1]
    y = np.cumsum(y)
    f = open('cdf_duration_dev.txt','w')
    for i in range(len(x)):
        f.write(str(x[i])+'\t'+str(y[i])+'\n')
    f.close()
Exemple #3
0
        if params.STAGE == 1:
            """
            At stage-1, anything that is not Document or Test, needs to be renamed 
            as Others
            """
            for p in [params.HAND_TASK_STR, params.EST_1_STR, \
                      params.EST_2_STR, params.EST_3_STR]:

                data[p] = np.array(data[p])

                data[p][np.invert(np.logical_or(data[p] == params.TEST, \
                     data[p] == params.DOCUMENT))] = params.OTHER
        else:
            """
            At stage-2, we filter out the rows that are labeled Document or Test
            and consider only the others
            """
            boolean_matrix = (data[params.HAND_TASK_STR] == np.array(
                params.TASKS)[:, None])
            query_array = boolean_matrix.any(axis=0)  # Logical or between rows
            for k in data.keys():
                data[k] = np.array(data[k])[query_array]

    titles = ftools.load(params.PATH_TITLE + params.DAT_FILE_PREFIX +
                         params.TITLE_MAT)
    data[params.WINDOW_STR] = [ftools.joinTitles(t) for t in titles]

    ctools.kNN(data, multi=False)
    ctools.randomForest(data, multi=False)
    ctools.svm(data, multi=False)
Exemple #4
0
import tools_presentation as ptools

from prettytable import PrettyTable

from importlib import reload
import params

reload(params)

if __name__ == "__main__":

    start_time = time.time()
    """
    Load activity data and set the definitions
    """
    exes = ftools.load(params.PATH_EXE + params.DAT_FILE_PREFIX +
                       params.EXE_MAT)
    windows = ftools.load(params.PATH_TITLE + params.DAT_FILE_PREFIX +
                          params.TITLE_MAT)
    tasks = ftools.load(params.PATH_TASK + params.DAT_FILE_PREFIX +
                        params.TASK_MAT)
    keystrokes_quan = ftools.load(params.PATH_KSTROKES +
                                  params.DAT_FILE_PREFIX + params.KSTROKE_MAT)
    lunch = ftools.load(params.PATH_LUNCH + params.DAT_FILE_PREFIX +
                        params.LUNCH_MAT)
    l_clicks = ftools.load(params.PATH_CLICKS + params.NEW_DAT +
                           params.LCLICK_MAT)
    r_clicks = ftools.load(params.PATH_CLICKS + params.NEW_DAT +
                           params.RCLICK_MAT)
    duration = ftools.load(params.PATH_DURATION + params.NEW_DAT +
                           params.DURATION_MAT)
import tools_dic as dtools
import tools_cramersV as Vtools

import params
from importlib import reload
reload(params)

if __name__ == "__main__":

    (exe_names, title_names, time_names,
     level_of_assoc) = dtools.define_names()
    """
    Load all data
    """

    exe_code_mat = ftools.load(params.PATH_EXE + params.DAT_FILE_PREFIX +
                               params.EXE_MAT)
    title_code_mat = ftools.load(params.PATH_TITLE + params.DAT_FILE_PREFIX +
                                 params.TITLE_MAT)
    task_code_mat = ftools.load(params.PATH_TASK + params.DAT_FILE_PREFIX +
                                params.TASK_MAT)
    time_code_mat = ftools.load(params.PATH_LUNCH + params.DAT_FILE_PREFIX +
                                params.LUNCH_MAT)
    """
    Note that currently there is no file 'count_exe_title_task' under the master 
    branch
    
    Please run this file after reproducing the below file by commenting out the
    last part in metadata.py
    """
    (count_exe, count_task_principal,
     count_title) = ftools.load('count_exe_title_task')
Exemple #6
0
from importlib import reload
import params
reload(params)

from prettytable import PrettyTable

import tools_file as ftools
import tools_dic as dtools

if __name__ == "__main__":

    (exe_names, window_names, time_names,
     level_of_assoc) = dtools.define_names()

    exe_code_mat = ftools.load(params.PATH_EXE + params.DAT_FILE_PREFIX +
                               params.EXE_MAT)
    title_code_mat = ftools.load(params.PATH_TITLE + params.DAT_FILE_PREFIX +
                                 params.TITLE_MAT)
    task_code_mat = ftools.load(params.PATH_TASK + params.DAT_FILE_PREFIX +
                                params.TASK_MAT)

    n_notask = np.sum(task_code_mat == 0)
    n_singletask = np.sum(task_code_mat != 0)

    n_total_lines = len(task_code_mat)
    r_notask = n_notask / n_total_lines
    r_singletask = n_singletask / n_total_lines
    """
    Distribution of principal tasks
    """
    count_task_principal = dtools.init_dic()