Beispiel #1
0
def best_test_perf_with_eval(filename, methods=None, org_names=None):
    """
    for each method report the best param based on the validation performance
    """
    ##FIXME the docs to the function 

    data = compressed_pickle.load(filename) 
    
    if methods is None:
        methods = data.keys()

    if org_names is None:
        org_names = data[methods[0]][0].keys()

    best_eval_perf = np.zeros((len(methods), len(org_names)))
    best_test_perf = np.zeros((len(methods), len(org_names)))

    for m_idx, m in enumerate(methods):
        for n_idx, n in enumerate(org_names):

            assert data[m][0][n].shape == data[m][1][n].shape
            num_splits, num_params = data[m][0][n].shape
            best_eval_param_idx_for_each_split = np.argmax(data[m][0][n], axis=1)
            assert len(best_eval_param_idx_for_each_split) == num_splits

            tmp_eval = []
            tmp_test = []
            for s_idx, p_idx in enumerate(best_eval_param_idx_for_each_split):
                tmp_eval.append(data[m][0][n][s_idx][p_idx])
                tmp_test.append(data[m][1][n][s_idx][p_idx])

            assert len(tmp_eval) == len(tmp_test) == num_splits

            best_eval_perf[m_idx, n_idx] = np.mean(tmp_eval)
            best_test_perf[m_idx, n_idx] = np.mean(tmp_test)
 
        print m, best_eval_perf[m_idx].mean(), best_test_perf[m_idx].mean()

    df_eval = pd.DataFrame(best_eval_perf, columns=org_names, index=methods)
    df_test = pd.DataFrame(best_test_perf, columns=org_names, index=methods)

    return df_eval, df_test
Beispiel #2
0
def best_test_perf_with_eval(filename, methods=None, org_names=None):
    """
    for each method report the best param based on the validation performance
    """
    ##FIXME the docs to the function 

    data = compressed_pickle.load(filename) 
    
    if methods is None:
        methods = data.keys()

    if org_names is None:
        org_names = data[methods[0]][0].keys()

    best_eval_perf = np.zeros((len(methods), len(org_names)))
    best_test_perf = np.zeros((len(methods), len(org_names)))

    for m_idx, m in enumerate(methods):
        for n_idx, n in enumerate(org_names):

            assert data[m][0][n].shape == data[m][1][n].shape
            num_splits, num_params = data[m][0][n].shape
            best_eval_param_idx_for_each_split = np.argmax(data[m][0][n], axis=1)
            assert len(best_eval_param_idx_for_each_split) == num_splits

            tmp_eval = []
            tmp_test = []
            for s_idx, p_idx in enumerate(best_eval_param_idx_for_each_split):
                tmp_eval.append(data[m][0][n][s_idx][p_idx])
                tmp_test.append(data[m][1][n][s_idx][p_idx])

            assert len(tmp_eval) == len(tmp_test) == num_splits

            best_eval_perf[m_idx, n_idx] = np.mean(tmp_eval)
            best_test_perf[m_idx, n_idx] = np.mean(tmp_test)
 
        print m, best_eval_perf[m_idx].mean(), best_test_perf[m_idx].mean()

    df_eval = pd.DataFrame(best_eval_perf, columns=org_names, index=methods)
    df_test = pd.DataFrame(best_test_perf, columns=org_names, index=methods)

    return df_eval, df_test
Beispiel #3
0
def best_org_param_idx(filename, diff_methods=None, org_names=None):
    """
    for each method and org, report best param
    """

    data = compressed_pickle.load(filename) 

    if diff_methods is None:
        diff_methods = data.keys()

    methods = ['individual', 'union', 'mtl', 'mtmkl'] ## pre-defined methods for learning techniques 
    #methods = ['individual']
    assert (set(methods)==set(diff_methods)), "methods from pickle file %s != %s" % (diff_methods, methods)

    if org_names is None:
        org_names = data[methods[0]][0].keys()
    
    best_param_method_org = defaultdict(dict)
    all_num_eval = np.zeros((len(methods), len(org_names)))
    all_num_test = np.zeros((len(methods), len(org_names)))
    
    for m_idx, m in enumerate(methods):
        for n_idx, n in enumerate(org_names):
            best_param_idx = np.argmax(data[m][0][n].mean(axis=0)) # argmax based on eval data 
            best_param_method_org[m][n] = best_param_idx
            all_num_eval[m_idx, n_idx] = data[m][0][n].mean(axis=0)[best_param_idx]
            all_num_test[m_idx, n_idx] = data[m][1][n].mean(axis=0)[best_param_idx]

        print m, all_num_test[m_idx].mean(), all_num_eval[m_idx].mean()

    # create pandas frames 
    df_eval = pd.DataFrame(all_num_eval, columns=org_names, index=methods)
    df_test = pd.DataFrame(all_num_test, columns=org_names, index=methods)

    # TODO plotting with pandas add-on 
    # df_eval.plot(kind="bar")

    # TODO: unit test 
    return df_eval, df_test
Beispiel #4
0
def best_org_param_idx(filename, diff_methods=None, org_names=None):
    """
    for each method and org, report best param
    """

    data = compressed_pickle.load(filename) 

    if diff_methods is None:
        diff_methods = data.keys()

    methods = ['individual', 'union', 'mtl', 'mtmkl'] ## pre-defined methods for learning techniques 
    #methods = ['individual']
    assert (set(methods)==set(diff_methods)), "methods from pickle file %s != %s" % (diff_methods, methods)

    if org_names is None:
        org_names = data[methods[0]][0].keys()
    
    best_param_method_org = defaultdict(dict)
    all_num_eval = np.zeros((len(methods), len(org_names)))
    all_num_test = np.zeros((len(methods), len(org_names)))
    
    for m_idx, m in enumerate(methods):
        for n_idx, n in enumerate(org_names):
            best_param_idx = np.argmax(data[m][0][n].mean(axis=0)) # argmax based on eval data 
            best_param_method_org[m][n] = best_param_idx
            all_num_eval[m_idx, n_idx] = data[m][0][n].mean(axis=0)[best_param_idx]
            all_num_test[m_idx, n_idx] = data[m][1][n].mean(axis=0)[best_param_idx]

        print m, all_num_test[m_idx].mean(), all_num_eval[m_idx].mean()

    # create pandas frames 
    df_eval = pd.DataFrame(all_num_eval, columns=org_names, index=methods)
    df_test = pd.DataFrame(all_num_test, columns=org_names, index=methods)

    # TODO plotting with pandas add-on 
    # df_eval.plot(kind="bar")

    # TODO: unit test 
    return df_eval, df_test
Beispiel #5
0
def best_global_param_idx(filename, methods=None, org_names=None):
    """
    for each method, report best param (averaged over orgs) based on eval data
    """
    data = compressed_pickle.load(filename) 

    if methods is None:
        methods = data.keys()

    if org_names is None:
        org_names = data[methods[0]][0].keys()

    best_param_method = {}
    best_test_method = {}

    inner_shape = data[methods[0]][0][org_names[0]].shape
    assert inner_shape == data[methods[0]][1][org_names[0]].shape

    import ipdb; ipdb.set_trace()

    for m in methods:
        all_num = np.zeros((len(org_names), inner_shape[0], inner_shape[1]))
        all_num_test = np.zeros((len(org_names), inner_shape[0], inner_shape[1]))

        for i,n in enumerate(org_names):
            assert data[m][0][n].shape == inner_shape
            all_num[i] = data[m][0][n]
            all_num_test[i] = data[m][1][n]
 
        # average over orgs and splits   
        mean_perf = all_num.mean(axis=1).mean(axis=0)
        assert len(mean_perf) == inner_shape[1]
        best_param_idx_eval = np.argmax(mean_perf)
        best_param_method[m] = best_param_idx_eval
        
        best_test_method[m] = all_num_test.mean(axis=1).mean(axis=0)[best_param_idx_eval]

    return best_param_method, best_test_method
Beispiel #6
0
def best_global_param_idx(filename, methods=None, org_names=None):
    """
    for each method, report best param (averaged over orgs) based on eval data
    """
    data = compressed_pickle.load(filename) 

    if methods is None:
        methods = data.keys()

    if org_names is None:
        org_names = data[methods[0]][0].keys()

    best_param_method = {}
    best_test_method = {}

    inner_shape = data[methods[0]][0][org_names[0]].shape
    assert inner_shape == data[methods[0]][1][org_names[0]].shape

    import ipdb; ipdb.set_trace()

    for m in methods:
        all_num = np.zeros((len(org_names), inner_shape[0], inner_shape[1]))
        all_num_test = np.zeros((len(org_names), inner_shape[0], inner_shape[1]))

        for i,n in enumerate(org_names):
            assert data[m][0][n].shape == inner_shape
            all_num[i] = data[m][0][n]
            all_num_test[i] = data[m][1][n]
 
        # average over orgs and splits   
        mean_perf = all_num.mean(axis=1).mean(axis=0)
        assert len(mean_perf) == inner_shape[1]
        best_param_idx_eval = np.argmax(mean_perf)
        best_param_method[m] = best_param_idx_eval
        
        best_test_method[m] = all_num_test.mean(axis=1).mean(axis=0)[best_param_idx_eval]

    return best_param_method, best_test_method
Beispiel #7
0
def highest_org_param(filename, diff_methods=None, org_names=None):
    """
    highest C for an organism 
    """

    data = compressed_pickle.load(filename) 

    if diff_methods is None:
        diff_methods = data.keys()

    methods = ['individual', 'union', 'mtl', 'mtmkl'] ## pre-defined methods for learning techniques 
    methods = ['individual', 'union', 'mtl'] 
    
    assert (set(methods)==set(diff_methods)), "methods from pickle file %s != %s" % (diff_methods, methods)

    if org_names is None:
        org_names = data[methods[0]][0].keys()

    all_num_eval = np.zeros((len(methods), len(org_names)))
    all_num_test = np.zeros((len(methods), len(org_names)))

    for m_idx, m in enumerate(methods):
        for n_idx, n in enumerate(org_names):
            best_eval_param = np.amax(data[m][0][n].mean(axis=0))
            all_num_eval[m_idx, n_idx] = best_eval_param
            best_test_param = np.amax(data[m][1][n].mean(axis=0))
            all_num_test[m_idx, n_idx] = best_test_param

        print m, all_num_test[m_idx].mean(), all_num_eval[m_idx].mean()

    # create pandas frames 
    df_eval = pd.DataFrame(all_num_eval, columns=org_names, index=methods)
    df_test = pd.DataFrame(all_num_test, columns=org_names, index=methods)

    # TODO: unit test 
    return df_eval, df_test
Beispiel #8
0
def highest_org_param(filename, diff_methods=None, org_names=None):
    """
    highest C for an organism 
    """

    data = compressed_pickle.load(filename) 

    if diff_methods is None:
        diff_methods = data.keys()

    methods = ['individual', 'union', 'mtl', 'mtmkl'] ## pre-defined methods for learning techniques 
    methods = ['individual', 'union', 'mtl'] 
    
    assert (set(methods)==set(diff_methods)), "methods from pickle file %s != %s" % (diff_methods, methods)

    if org_names is None:
        org_names = data[methods[0]][0].keys()

    all_num_eval = np.zeros((len(methods), len(org_names)))
    all_num_test = np.zeros((len(methods), len(org_names)))

    for m_idx, m in enumerate(methods):
        for n_idx, n in enumerate(org_names):
            best_eval_param = np.amax(data[m][0][n].mean(axis=0))
            all_num_eval[m_idx, n_idx] = best_eval_param
            best_test_param = np.amax(data[m][1][n].mean(axis=0))
            all_num_test[m_idx, n_idx] = best_test_param

        print m, all_num_test[m_idx].mean(), all_num_eval[m_idx].mean()

    # create pandas frames 
    df_eval = pd.DataFrame(all_num_eval, columns=org_names, index=methods)
    df_test = pd.DataFrame(all_num_test, columns=org_names, index=methods)

    # TODO: unit test 
    return df_eval, df_test