def get_p_window_given_task(tasks, windows, window_names, unique_tasks=TASKS): """ Similar to the above, 1D case relating window titles. This function is called when exe name and window title are judged to be independent. Here, we get the conditional probability of observing the window title, given the task. For the same reason as above, we compute the conditional probability for each possible value of task. """ p_window_given_task = dtools.init_dic_matrix(unique_tasks, window_names) # + class for unknown titles # first unknown titles and known tasks for task in unique_tasks: p_window_given_task[task][''] = np.sum(np.logical_and( windows == '', tasks == task)) p_window_given_task[task] = dtools.normalize(p_window_given_task[task]) # then known titles and known tasks for w in window_names: for task in unique_tasks: temp_window = windows == w temp_task = tasks == task p_window_given_task[task][w] = np.sum(np.logical_and(temp_window, temp_task)) p_window_given_task[task] = dtools.normalize(p_window_given_task[task]) return p_window_given_task
def get_p_rclicks_given_task(tasks, rclicks, n_clicks, unique_tasks=TASKS): """ Get the conditional probability of observing rclicks many right clicks, given the task. For the same reason as above, we compute the conditional probability for each possible value of task. """ p_rclicks_given_task = dtools.init_dic_matrix(unique_tasks, np.arange(n_clicks)) for task in unique_tasks: for i in range(n_clicks): p_rclicks_given_task [task][i] = np.sum( np.logical_and( rclicks == i, tasks == task)) p_rclicks_given_task [task] = dtools.normalize(p_rclicks_given_task[task]) return p_rclicks_given_task
def computeCramers(data1, dataName1, data2, dataName2): """ Correlation between data1 and data2 (in our case a task and a descriptor) cramersV is symmetric so, it does not matter which is first variable (dimension) and which is second """ d1 = np.unique(data1) d2 = np.unique(data2) crosstab = dtools.init_dic_matrix(d1, d2) for val1 in d1: for val2 in d2: crosstab[val1][val2] = np.sum( np.multiply(data1 == val1, data2 == val2)) (x_sq, v) = cramersV(crosstab) (x_sq_cor, v_cor) = cramersV_bias_corrected(crosstab) print('******************************') print('Correlation between {} and {}\n'.format(dataName1, dataName2)) print('Cramers V: {:.3f}'.format(v)) print('Cramers V: {:.3f} (bias corrected)\n'.format(v_cor))
def get_p_keyst_given_task(tasks, keystrokes_quan, n_tot_keyst, unique_tasks=TASKS): """ This function returns the conditional probability of observing keystrokes_quan quantiles of keystrokes, given the task. Since we are trying to estimate the task, at this point it is still unkonwn and therefore we compute the conditional probability for each possible value of task. """ p_keyst_given_task = dtools.init_dic_matrix(unique_tasks, np.arange(n_tot_keyst)) for task in unique_tasks: for i in range(n_tot_keyst): p_keyst_given_task[task][i] = np.sum( np.logical_and( keystrokes_quan == i, tasks == task)) p_keyst_given_task[task] = dtools.normalize(p_keyst_given_task[task]) return p_keyst_given_task
def get_p_duration_given_task(tasks, duration, unique_tasks=TASKS): """ This function is very similar the 1D case relating exe name or window title. Here, we get the conditional probability of observing the duration value, given the task. For the same reason as above, we compute the conditional probability for each possible value of task. """ n_bins = len(np.unique(duration)) p_duration_given_task = dtools.init_dic_matrix(unique_tasks, np.arange(n_bins)) for task in unique_tasks: for i in range(n_bins+1): p_duration_given_task[task][i] = np.sum( np.logical_and( duration == i, tasks == task)) p_duration_given_task[task] = dtools.normalize(p_duration_given_task[task]) return p_duration_given_task
def get_p_exe_given_task(tasks, exes, exe_names, unique_tasks=TASKS): """ This is 1D case relating application (i.e. exe) names. Namely, in case exe name and window title are judged to be independent, we treat each of them individualy (i.e. as 1D). Here, we get the conditional probability of observing the exe name, given the task. For the same reason as above, we compute the conditional probability for each possible value of task. """ p_exe_given_task = dtools.init_dic_matrix(unique_tasks, exe_names) for e in exe_names: for task in unique_tasks: p_exe_given_task[task][e] = np.sum( np.logical_and( exes == e, tasks == task)) p_exe_given_task[task] = dtools.normalize(p_exe_given_task[task]) return p_exe_given_task
n_task = params.N_TASKS # I include '0' for unknown task (not labeled by Shimizu) all_exe = (np.unique(exe_code_mat)) # number of all exe all_title = ( np.unique(title_code_mat) ) # all window titles used in the Shimizu's rules and +1 for the titles which do not include keyword from his rules n_all_time = 2 # for lunch break and not-lunch break """ Correlation between exe and task CramersV is symmetric (so it does not matter which one comes first) """ cV = [] for exe in all_exe: """ Two rows in cross tabulation: one for exeE positive, other for exeE negative """ crosstab_exeE_vs_task = dtools.init_dic_matrix( ['Positive', 'Negative']) tempE = (exe_code_mat == exe) for task in params.TASKS: # exeE negative crosstab_exeE_vs_task['Negative'][task] = np.sum( np.logical_and(tempE == False, \ task_code_mat == task)) # exeE positive crosstab_exeE_vs_task['Positive'][task] = np.sum( np.logical_and(tempE == True, \ task_code_mat == task)) (x_sq_task_vs_exe, v_task_vs_exe) = Vtools.cramersV(crosstab_exeE_vs_task) (x_sq_task_vs_exe_cor, v_task_vs_exe_cor ) = Vtools.cramersV_bias_corrected(crosstab_exeE_vs_task) cV.append(v_task_vs_exe)