Example #1
0
def to_data_frame(file_names,
                  result_objs,
                  varying_params,
                  constant_params,
                  shorten_headers=True):
    all_params = [merge_dicts(var, constant_params) for var in varying_params]
    file_numbers = [int(f.split('/')[-1].split('.')[0]) for f in file_names]

    # remove dollars
    for param_dict in all_params:
        for key, val in param_dict.iteritems():
            param_dict[key] = remove_dollar(val)

    param_keys = all_params[0].keys()
    param_vals = [[v[key] for key in param_keys] for v in all_params]
    # transform lists to tuples to make them hashable
    param_vals = [[to_tuple_if_list(v) for v in var_list]
                  for var_list in param_vals]
    param_vals = np.array(param_vals, dtype=object)
    test_accs = (1 - get_final_misclasses(result_objs, 'test')) * 100
    train_accs = (1 - get_final_misclasses(result_objs, 'train')) * 100
    training_times = get_training_times(result_objs)
    # try adding sample accuracies, might exist, might not
    sample_accs_exist = (hasattr(result_objs[0], 'monitor_channels')
                         and 'test_sample_misclass'
                         in result_objs[0].monitor_channels)
    if sample_accs_exist:
        test_sample_accs = (
            1 - get_final_misclasses(result_objs, 'test_sample')) * 100
        train_sample_accs = (
            1 - get_final_misclasses(result_objs, 'train_sample')) * 100
        vals_and_misclasses = np.append(param_vals,
                                        np.array([
                                            training_times, test_accs,
                                            test_sample_accs, train_accs,
                                            train_sample_accs
                                        ]).T,
                                        axis=1)
    else:
        vals_and_misclasses = np.append(
            param_vals,
            np.array([training_times, test_accs, train_accs]).T,
            axis=1)
    if shorten_headers:
        param_keys = [prettify_word(key) for key in param_keys]

    if sample_accs_exist:
        all_keys = param_keys + [
            'time', 'test', 'test_sample', 'train', 'train_sample'
        ]
    else:
        all_keys = param_keys + ['time', 'test', 'train']

    data_frame = MetaDataFrame(vals_and_misclasses,
                               index=file_numbers,
                               columns=all_keys)
    data_frame = to_numeric_where_possible(data_frame)
    data_frame.time = pd.to_timedelta(np.round(data_frame.time), unit='s')
    return data_frame
Example #2
0
def print_stats(results, csp_results, n_diffs=None):
    res_misclasses = get_final_misclasses(results)
    csp_misclasses = get_final_misclasses(csp_results)
    res_times = get_training_times(results)
    csp_times = get_training_times(csp_results)
    # make a the smaller misclass, b the larger misclass
    if np.mean(res_misclasses) < np.mean(csp_misclasses):
        a = res_misclasses
        b = csp_misclasses
    else:
        a = csp_misclasses
        b = res_misclasses

    actual_diff = np.mean(a - b)

    if n_diffs is None:
        diffs = perm_mean_diffs(a, b)
    else:
        diffs = perm_mean_diffs_sampled(a, b, n_diffs=n_diffs)
    res_to_csp_diff = np.mean(res_misclasses - csp_misclasses)

    print("deep accuracy:    {:.1f}".format(100 *
                                            (1 - np.mean(res_misclasses))))
    print("csp  accuracy:    {:.1f}".format(100 *
                                            (1 - np.mean(csp_misclasses))))
    print("diff accuracy:    {:.1f}".format(100 * -res_to_csp_diff))
    print("std          :    {:.1f}".format(
        100 * np.std(res_misclasses - csp_misclasses)))

    print("one sided perm     {:.5f}".format(
        np.sum(diffs <= actual_diff) / float(len(diffs))))
    print("one sided wilcoxon {:.5f}".format(
        scipy.stats.wilcoxon(res_misclasses, csp_misclasses)[1] / 2))
    #print("two sided perm     {:.5f}".format(np.sum(
    #    abs(diffs) >= abs(actual_diff)) / float(len(diffs))))
    #print("two sided wilcoxon {:.5f}".format(scipy.stats.wilcoxon(
    #    res_misclasses, csp_misclasses)[1]))
    print("deep time:        {:s}".format(
        str(datetime.timedelta(seconds=round(np.mean(res_times))))))
    print("csp time:         {:s}".format(
        str(datetime.timedelta(seconds=round(np.mean(csp_times))))))
Example #3
0
def print_stats(results, csp_results, n_diffs=None):
    res_misclasses = get_final_misclasses(results)
    csp_misclasses = get_final_misclasses(csp_results)
    res_times = get_training_times(results)
    csp_times = get_training_times(csp_results)
    # make a the smaller misclass, b the larger misclass
    if np.mean(res_misclasses) < np.mean(csp_misclasses):
        a = res_misclasses
        b = csp_misclasses
    else:
        a = csp_misclasses
        b = res_misclasses


    actual_diff = np.mean(a - b)

    if n_diffs is None:
        diffs = perm_mean_diffs(a, b)
    else:
        diffs = perm_mean_diffs_sampled(a,b,n_diffs=n_diffs)
    res_to_csp_diff = np.mean(res_misclasses - csp_misclasses)

    print ("deep accuracy:    {:.1f}".format( 100 * (1 - np.mean(res_misclasses))))
    print ("csp  accuracy:    {:.1f}".format( 100 * (1 - np.mean(csp_misclasses))))
    print ("diff accuracy:    {:.1f}".format( 100 * -res_to_csp_diff))
    print ("std          :    {:.1f}".format( 100 * np.std(res_misclasses - 
        csp_misclasses)))
    
    print("one sided perm     {:.5f}".format(np.sum(diffs <= actual_diff) 
        / float(len(diffs))))
    print("one sided wilcoxon {:.5f}".format(scipy.stats.wilcoxon(
        res_misclasses, csp_misclasses)[1] / 2))
    #print("two sided perm     {:.5f}".format(np.sum(
    #    abs(diffs) >= abs(actual_diff)) / float(len(diffs))))
    #print("two sided wilcoxon {:.5f}".format(scipy.stats.wilcoxon(
    #    res_misclasses, csp_misclasses)[1]))
    print ("deep time:        {:s}".format(str(datetime.timedelta(
                seconds=round(np.mean(res_times))))))
    print ("csp time:         {:s}".format(str(datetime.timedelta(
                seconds=round(np.mean(csp_times))))))
Example #4
0
def to_data_frame(file_names, result_objs, varying_params, constant_params,
        shorten_headers=True):
    all_params = [merge_dicts(var, constant_params) for var in varying_params]
    file_numbers = [int(f.split('/')[-1].split('.')[0]) for f in file_names]
    
    # remove dollars
    for param_dict in all_params:
        for key, val in param_dict.iteritems():
            param_dict[key] = remove_dollar(val)
    
    param_keys = all_params[0].keys()
    param_vals = [[v[key] for key in param_keys] for v in all_params]
    # transform lists to tuples to make them hashable
    param_vals = [[to_tuple_if_list(v) for v in var_list] for var_list in param_vals]
    param_vals = np.array(param_vals, dtype=object)
    test_accs = (1 - get_final_misclasses(result_objs, 'test')) * 100
    train_accs = (1 - get_final_misclasses(result_objs, 'train')) * 100
    training_times = get_training_times(result_objs)
    # try adding sample accuracies, might exist, might not 
    sample_accs_exist = (hasattr(result_objs[0], 'monitor_channels') and
        'test_sample_misclass' in result_objs[0].monitor_channels)
    if sample_accs_exist:
        test_sample_accs = (1 - get_final_misclasses(result_objs, 'test_sample')) * 100
        train_sample_accs = (1 - get_final_misclasses(result_objs, 'train_sample')) * 100
        vals_and_misclasses =  np.append(param_vals, 
            np.array([training_times, test_accs, test_sample_accs,
                train_accs, train_sample_accs]).T, 
            axis=1)
    else:
        vals_and_misclasses = np.append(param_vals, 
            np.array([training_times, test_accs, train_accs]).T, 
            axis=1)
    if shorten_headers:
        param_keys = [prettify_word(key) for key in param_keys]
    
    if sample_accs_exist:
        all_keys = param_keys + ['time', 'test', 'test_sample', 'train',
            'train_sample']
    else:
        all_keys = param_keys + ['time', 'test', 'train']
        
    data_frame = MetaDataFrame(vals_and_misclasses, index=file_numbers, 
        columns=all_keys)
    data_frame = to_numeric_where_possible(data_frame)
    data_frame.time = pd.to_timedelta(np.round(data_frame.time), unit='s')
    return data_frame