Ejemplo n.º 1
0
def compare_net_csp(df_net, df_csp, name,freq, dataset, with_csp_acc=False, 
        with_std=False, with_std_error=False, max_n_p_vals=20):
    assert len(df_net) == len(df_csp), (
        "Net ({:d}) and csp ({:d}) should have same length".format(
            len(df_net), len(df_csp)))
    df_merged = df_net.merge(df_csp, on='dataset_filename', suffixes=('_net','_csp'))
    # not really necessary to sort, just to make sure 
    df_merged = df_merged.sort_values(by='dataset_filename')

    test_acc_net = np.array(df_merged['test_net'])
    test_acc_csp = np.array(df_merged['test_csp'])
    if len(test_acc_net) > max_n_p_vals:
        p_val = perm_mean_diff_test(test_acc_net,test_acc_csp, n_diffs=2**max_n_p_vals)
    else:
        p_val = perm_mean_diff_test(test_acc_net,test_acc_csp, n_diffs=None)
    p_val_wilc = wilcoxon_signed_rank(test_acc_net, test_acc_csp)
    p_val_sign = sign_test(test_acc_net, test_acc_csp)
    diff_std = np.std(test_acc_net - test_acc_csp)

    df_out = pd.DataFrame()

    df_out['name'] = [name]
    df_out['freq'] = [freq]
    df_out['dataset'] = [dataset]
    if with_csp_acc:
        df_out['test_csp'] = [np.mean(test_acc_csp)]
        
    df_out['test_net'] = [np.mean(test_acc_net)]
    df_out['diff'] = [np.mean(test_acc_net) - np.mean(test_acc_csp)]
    if with_std:
        df_out['std'] = [diff_std]
    if with_std_error:
        df_out['stderr'] = [diff_std / np.sqrt(len(test_acc_net))]
    df_out = round_numeric_columns(df_out,1)
        
    
        
    df_out['rand'] = [p_val]
    df_out['wilc'] = [p_val_wilc]
    df_out['sign'] = [p_val_sign]
    df_out['time_net'] = [pd.Timedelta.round(np.mean(df_net.time), 's')]

    assert len(df_merged) == len(df_csp), (
        "Merged ({:d}) and csp ({:d}) should have same length".format(
            len(df_merged), len(df_csp)))
    return df_out
Ejemplo n.º 2
0
def pairwise_compare_frame(df, with_p_vals=False):
    table_vals = []
    table_indices = []
    param_keys = set(df.keys()) - set(['test', 'time', 'train',
        'test_sample', 'train_sample'])
    for key in param_keys:
        if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id':
            continue
        possible_vals = df[key].unique()
        for i_value_a in range(0, len(possible_vals) - 1):
            for i_value_b in range(i_value_a + 1, len(possible_vals)):
                val_a = possible_vals[i_value_a]
                val_b = possible_vals[i_value_b]
                frame_1 = df[df[key] == val_a]
                frame_2 = df[df[key] == val_b]
                other_param_keys = list(param_keys - set([key]))
                joined_frame = frame_1.merge(frame_2, on=other_param_keys)
                if joined_frame.size == 0:
                    continue
                accuracies_a = np.array(joined_frame.test_x,
                    dtype=np.float64)
                accuracies_b = np.array(joined_frame.test_y,
                    dtype=np.float64)
                mean_a = np.mean(accuracies_a)
                mean_b = np.mean(accuracies_b)
                # Always put better value first in table
                if mean_a >= mean_b:
                    accuracies_1 = accuracies_a
                    accuracies_2 = accuracies_b
                    mean_1 = mean_a 
                    mean_2 = mean_b 
                    val_1 = val_a
                    val_2 = val_b
                else:
                    accuracies_1 = accuracies_b
                    accuracies_2 = accuracies_a
                    mean_1 = mean_b 
                    mean_2 = mean_a 
                    val_1 = val_b
                    val_2 = val_a
                if with_p_vals:
                    if len(accuracies_1) <= 18:
                        diff_perm = perm_mean_diff_test(accuracies_1,
                            accuracies_2) * 100
                    elif len(accuracies_1) <= 62:
                        diff_perm = perm_mean_diff_test(accuracies_1,
                            accuracies_2, n_diffs=2**17) * 100
                    else:
                        _, diff_perm = wilcoxon(accuracies_1,
                            accuracies_2)
                        diff_perm *= 100

                diffs = accuracies_2 - accuracies_1
                diff_std = np.std(diffs)
                diff_mean = np.mean(diffs)
                this_vals = [len(accuracies_1), str(val_1), str(val_2),
                    mean_1, mean_2, diff_mean, diff_std]
                if with_p_vals:
                    this_vals.append(diff_perm)
                table_vals.append(this_vals)
                table_indices.append(key)

    if len(table_vals) == 0:
        return None
    table_vals = np.array(table_vals)
    compare_headers = ['n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2',
                       'diff', 'std']
    if with_p_vals:
        compare_headers.append('p_val')
    compare_frame = pd.DataFrame(table_vals, columns=compare_headers,  
                                 index=(table_indices))
    compare_frame = to_numeric_where_possible(compare_frame)
    compare_frame = round_numeric_columns(compare_frame, 1)
    return compare_frame
Ejemplo n.º 3
0
def pairwise_compare_frame(df,
                           with_p_vals=False,
                           result_cols=('test', 'time', 'train', 'test_sample',
                                        'train_sample'),
                           compare_col='test'):
    table_vals = []
    table_indices = []
    param_keys = set(df.keys()) - set(list(result_cols))
    for key in param_keys:
        if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id':
            continue
        possible_vals = df[key].unique()
        for i_value_a in range(0, len(possible_vals) - 1):
            for i_value_b in range(i_value_a + 1, len(possible_vals)):
                val_a = possible_vals[i_value_a]
                val_b = possible_vals[i_value_b]
                frame_1 = df[df[key] == val_a]
                frame_2 = df[df[key] == val_b]
                other_param_keys = list(param_keys - set([key]))
                joined_frame = frame_1.merge(frame_2, on=other_param_keys)
                if joined_frame.size == 0:
                    continue
                accuracies_a = np.array(joined_frame[compare_col + '_x'],
                                        dtype=np.float64)
                accuracies_b = np.array(joined_frame[compare_col + '_y'],
                                        dtype=np.float64)
                mean_a = np.mean(accuracies_a)
                mean_b = np.mean(accuracies_b)
                # Always put better value first in table
                if mean_a >= mean_b:
                    accuracies_1 = accuracies_a
                    accuracies_2 = accuracies_b
                    mean_1 = mean_a
                    mean_2 = mean_b
                    val_1 = val_a
                    val_2 = val_b
                else:
                    accuracies_1 = accuracies_b
                    accuracies_2 = accuracies_a
                    mean_1 = mean_b
                    mean_2 = mean_a
                    val_1 = val_b
                    val_2 = val_a
                if with_p_vals:
                    if len(accuracies_1) <= 18:
                        diff_perm = perm_mean_diff_test(
                            accuracies_1, accuracies_2) * 100
                    elif len(accuracies_1) <= 62:
                        diff_perm = perm_mean_diff_test(
                            accuracies_1, accuracies_2, n_diffs=2**17) * 100
                    else:
                        _, diff_perm = wilcoxon(accuracies_1, accuracies_2)
                        diff_perm *= 100

                diffs = accuracies_2 - accuracies_1
                diff_std = np.std(diffs)
                diff_mean = np.mean(diffs)
                this_vals = [
                    len(accuracies_1),
                    str(val_1),
                    str(val_2), mean_1, mean_2, diff_mean, diff_std
                ]
                if with_p_vals:
                    this_vals.append(diff_perm)
                table_vals.append(this_vals)
                table_indices.append(key)

    if len(table_vals) == 0:
        return None
    table_vals = np.array(table_vals)
    compare_headers = [
        'n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2', 'diff', 'std'
    ]
    if with_p_vals:
        compare_headers.append('p_val')
    compare_frame = pd.DataFrame(table_vals,
                                 columns=compare_headers,
                                 index=(table_indices))
    compare_frame = to_numeric_where_possible(compare_frame)
    compare_frame = round_numeric_columns(compare_frame, 1)
    return compare_frame