Esempio n. 1
0
    "lg_XBA.pkl": 0,
    "lg_XBC.pkl": 0,
    "lg_XCC.pkl": 0,
    "lg_XBCCR.pkl": 0,
    "lg_XBCR.pkl": 0,
    "lg_XCCR.pkl": 0,
    "lg_XBCOV.pkl": 0,
    "lg_XB.pkl": 0,
    "lg_XC.pkl": 0
}
path = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results"

for key, val in respath.items():
    respath[key] = os.path.join(path, key)

auc_df = pd.DataFrame(
    columns=[key.split('_')[1].replace('.pkl', '') for key in respath.keys()])

for key, val in respath.items():
    col = key.split('_')[1].replace('.pkl', '')
    data = utils.read_pickle(val)['auc']
    auc_df[col] = [i[1] for i in data]

order = ['XBA', 'XBC', 'XBCCR', 'XBCOV', 'XBCR', 'XB', 'XCC', 'XCCR', 'XC']
sns.violinplot(auc_df, order=order)
plt.title("Distributions of AUC")
fig = plt.gcf()
fig.set_size_inches(12, 7)
fig.savefig('lg_auc_dist.png', dpi=400, bbox_inches="tight")
plt.close()
Esempio n. 2
0
result_name = sys.argv[1]
header_name = sys.argv[2]
save_name = sys.argv[3]

brain = Brain("fsaverage", "split", "inflated",
               views=['lat', 'med'], background="white")


hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers"
rp = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results"

with open(os.path.join(hp, header_name), "r") as tmp:
    headers = np.array(tmp.read().strip("\n").split("\n"))

coefs = utils.read_pickle(os.path.join(rp,"{}".format(result_name)))['coef']
nz = lambda x: np.nonzero(x)[0]
features_selected_lh = []
features_selected_rh = []
features_all = []

from collections import Counter
import pandas as pd

for coef in coefs:
    idx = nz(coef[1])
    features_all.extend(headers[idx].tolist())
    for col in headers[idx].tolist():
        if 'lh' in col:
            features_selected_lh.append(col)
        elif 'rh' in col:
Esempio n. 3
0
header_name = sys.argv[2]
save_name = sys.argv[3]

brain = Brain("fsaverage",
              "split",
              "inflated",
              views=['lat', 'med'],
              background="white")

hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers"
rp = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results"

with open(os.path.join(hp, header_name), "r") as tmp:
    headers = np.array(tmp.read().strip("\n").split("\n"))

coefs = utils.read_pickle(os.path.join(rp, "{}".format(result_name)))['coef']
nz = lambda x: np.nonzero(x)[0]
features_selected_lh = []
features_selected_rh = []
features_all = []

from collections import Counter
import pandas as pd

for coef in coefs:
    idx = nz(coef[1])
    features_all.extend(headers[idx].tolist())
    for col in headers[idx].tolist():
        if 'lh' in col:
            features_selected_lh.append(col)
        elif 'rh' in col:
Esempio n. 4
0
    "et_XBC.pkl": 0,
    "et_XCC.pkl": 0,
    "et_XBCCR.pkl": 0,
    "et_XBCR.pkl": 0,
    "et_XCCR.pkl": 0,
    "et_XBCOV.pkl": 0,
    "et_XB.pkl": 0,
    "et_XC.pkl": 0
}
path = "/storage/gablab001/data/genus/fs_cog/pred_diag/extra_trees/results/"

for key, val in respath.items():
    respath[key] = os.path.join(path, key)

auc_df = pd.DataFrame(
    columns=[key.split('_')[1].replace(".pkl", "") for key in respath.keys()])

for key, val in respath.items():
    col = key.split('_')[1].replace(".pkl", "")
    data = utils.read_pickle(val)
    #auc_df[col] = [v for k, v in data.items() if "auc" in k]
    auc_df[col] = [a[1] for a in data['auc']]

order = ['XBA', 'XBC', 'XBCCR', 'XBCOV', 'XBCR', 'XB', 'XCC', 'XCCR', 'XC']
sns.violinplot(auc_df, order=order)
plt.title("Distributions of AUC")
fig = plt.gcf()
fig.set_size_inches(12, 7)
fig.savefig('et_auc_dist.png', dpi=400, bbox_inches="tight")
plt.close()
Esempio n. 5
0
#def extract(res, k2):
#    k1 = k2[-1]
#    tmp = [(int(key.split(k1)[1]), val) for
#           key, val in res.items() if k2 in key]
#    return sorted(tmp, key=lambda tup: tup[0])


def mask2imp(mask, imp):
    mask_copy = mask.copy().astype(float)
    mask_copy[mask_copy > 0] = imp
    return mask_copy


for key_i, val_i in dnh.items():
    result = utils.read_pickle(os.path.join(dp, key_i))
    header = np.genfromtxt(os.path.join(hp, val_i), dtype=str)
    #coef = extract(result, "coef")
    #aucs = [i[1] for i in extract(result, "auc")]
    #coef = [c[1] for c in result['coef']]
    aucs = [a[1] for a in result['auc']]
    #fimp_mat = np.array([coef[i][1] for i in range(len(coef))])
    fimp_mat = np.array([c[1] for c in result['coef']])
    wf_imp = (np.abs(fimp_mat) * np.array(aucs)[:, None]).sum(0) / np.sum(aucs)
    df = pd.DataFrame(columns=header)
    df.loc[0, :] = wf_imp
    fn = equate_n_sort(all_headers, header)
    data_save.append(pd.concat([fn, df]).fillna(0).sum(0))


def most_imp(data, nth_p):
Esempio n. 6
0
from custom import utils
from collections import Counter
import matplotlib.pyplot as plt

# command line inputs
result_name = sys.argv[1]
header_name = sys.argv[2]
save_name = sys.argv[3]

# headers
hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers"

with open(os.path.join(hp, header_name), "r") as tmp:
    headers = np.array(tmp.read().strip("\n").split("\n"))

coefs = utils.read_pickle("../{}".format(result_name))['coef']
nz = lambda x: np.nonzero(x)[0]
cog_scores = ['SOP', 'RPS', 'VLM']
cols = []

for coef in coefs:
    selected = headers[nz(coef[1])]
    cols.extend(selected.tolist())

counts = dict(Counter(cols))
to_keep = {}

for key, val in counts.items():
    s = 'DOMAIN'
    if ('SOP_' + s in key) or ('VLM_' + s in key) or ('RPS_' + s in key):
        to_keep[key] = val
Esempio n. 7
0
#    imp = imp.tolist()
#    for idx in range(max_mask):
#        if mask[idx]:
#            out_imp.append(imp[idx])
#        else:
#            imp.insert(idx, 0)
#            out_imp.append(0)
#    return np.array(out_imp)

def mask2imp(mask, imp):
    mask_copy = mask.copy().astype(float)
    mask_copy[mask_copy > 0] = imp
    return mask_copy

for key_i, val_i in dnh.items():
    result = utils.read_pickle(os.path.join(dp, key_i))
    header = np.genfromtxt(os.path.join(hp, val_i), dtype=str)
    #fimp = extract(result, "fimp")
    #aucs = [i[1] for i in extract(result, "auc")]
    #masks = extract(result, "mask")
    fimp = np.array([f[1] for f in result['fimp']])
    aucs = [a[1] for a in result['auc']]
    masks = np.array([m[1] for m in result['mask']])
    fimp_mat = np.array([mask2imp(masks[i], fimp[i]) for i in range(len(fimp))])
    #fimp_mat = np.array([fimp[i] for i in range(len(fimp))])
    wf_imp =(np.abs(fimp_mat)*np.array(aucs)[:, None]).sum(0)/np.sum(aucs)
    df = pd.DataFrame(columns=header)
    df.loc[0, :] = wf_imp
    fn = equate_n_sort(all_headers, header)
    data_save.append(pd.concat([fn, df]).fillna(0).sum(0))
Esempio n. 8
0
respath = {
"lg_XBA.pkl":0,
"lg_XBC.pkl":0,
"lg_XCC.pkl":0,
"lg_XBCCR.pkl":0,
"lg_XBCR.pkl":0,
"lg_XCCR.pkl":0,
"lg_XBCOV.pkl":0,
"lg_XB.pkl":0,
"lg_XC.pkl":0
}
path = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results"

for key, val in respath.items():
    respath[key] = os.path.join(path, key)

auc_df = pd.DataFrame(columns=[key.split('_')[1].replace('.pkl', '') for key in respath.keys()])

for key, val in respath.items():
    col = key.split('_')[1].replace('.pkl', '')
    data = utils.read_pickle(val)['auc']
    auc_df[col] = [i[1] for i in data]

order = ['XBA','XBC','XBCCR','XBCOV','XBCR','XB','XCC','XCCR','XC']
sns.violinplot(auc_df, order=order)
plt.title("Distributions of AUC")
fig = plt.gcf()
fig.set_size_inches(12, 7)
fig.savefig('lg_auc_dist.png', dpi=400, bbox_inches="tight")
plt.close()
Esempio n. 9
0
from custom import utils
from collections import Counter
import matplotlib.pyplot as plt

# command line inputs
result_name = sys.argv[1]
header_name = sys.argv[2]
save_name = sys.argv[3]

# headers
hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers"

with open(os.path.join(hp, header_name), "r") as tmp:
    headers = np.array(tmp.read().strip("\n").split("\n")) 

coefs = utils.read_pickle("../{}".format(result_name))['coef']
nz = lambda x: np.nonzero(x)[0]
cog_scores = ['SOP','RPS','VLM']
cols = []

for coef in coefs:
    selected = headers[nz(coef[1])]
    cols.extend(selected.tolist())

counts = dict(Counter(cols))
to_keep = {}

for key, val in counts.items():
    s = 'DOMAIN'
    if ('SOP_'+s in key) or ('VLM_'+s in key) or ('RPS_'+s in key):
        to_keep[key] = val