"lg_XBA.pkl": 0, "lg_XBC.pkl": 0, "lg_XCC.pkl": 0, "lg_XBCCR.pkl": 0, "lg_XBCR.pkl": 0, "lg_XCCR.pkl": 0, "lg_XBCOV.pkl": 0, "lg_XB.pkl": 0, "lg_XC.pkl": 0 } path = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results" for key, val in respath.items(): respath[key] = os.path.join(path, key) auc_df = pd.DataFrame( columns=[key.split('_')[1].replace('.pkl', '') for key in respath.keys()]) for key, val in respath.items(): col = key.split('_')[1].replace('.pkl', '') data = utils.read_pickle(val)['auc'] auc_df[col] = [i[1] for i in data] order = ['XBA', 'XBC', 'XBCCR', 'XBCOV', 'XBCR', 'XB', 'XCC', 'XCCR', 'XC'] sns.violinplot(auc_df, order=order) plt.title("Distributions of AUC") fig = plt.gcf() fig.set_size_inches(12, 7) fig.savefig('lg_auc_dist.png', dpi=400, bbox_inches="tight") plt.close()
result_name = sys.argv[1] header_name = sys.argv[2] save_name = sys.argv[3] brain = Brain("fsaverage", "split", "inflated", views=['lat', 'med'], background="white") hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers" rp = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results" with open(os.path.join(hp, header_name), "r") as tmp: headers = np.array(tmp.read().strip("\n").split("\n")) coefs = utils.read_pickle(os.path.join(rp,"{}".format(result_name)))['coef'] nz = lambda x: np.nonzero(x)[0] features_selected_lh = [] features_selected_rh = [] features_all = [] from collections import Counter import pandas as pd for coef in coefs: idx = nz(coef[1]) features_all.extend(headers[idx].tolist()) for col in headers[idx].tolist(): if 'lh' in col: features_selected_lh.append(col) elif 'rh' in col:
header_name = sys.argv[2] save_name = sys.argv[3] brain = Brain("fsaverage", "split", "inflated", views=['lat', 'med'], background="white") hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers" rp = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results" with open(os.path.join(hp, header_name), "r") as tmp: headers = np.array(tmp.read().strip("\n").split("\n")) coefs = utils.read_pickle(os.path.join(rp, "{}".format(result_name)))['coef'] nz = lambda x: np.nonzero(x)[0] features_selected_lh = [] features_selected_rh = [] features_all = [] from collections import Counter import pandas as pd for coef in coefs: idx = nz(coef[1]) features_all.extend(headers[idx].tolist()) for col in headers[idx].tolist(): if 'lh' in col: features_selected_lh.append(col) elif 'rh' in col:
"et_XBC.pkl": 0, "et_XCC.pkl": 0, "et_XBCCR.pkl": 0, "et_XBCR.pkl": 0, "et_XCCR.pkl": 0, "et_XBCOV.pkl": 0, "et_XB.pkl": 0, "et_XC.pkl": 0 } path = "/storage/gablab001/data/genus/fs_cog/pred_diag/extra_trees/results/" for key, val in respath.items(): respath[key] = os.path.join(path, key) auc_df = pd.DataFrame( columns=[key.split('_')[1].replace(".pkl", "") for key in respath.keys()]) for key, val in respath.items(): col = key.split('_')[1].replace(".pkl", "") data = utils.read_pickle(val) #auc_df[col] = [v for k, v in data.items() if "auc" in k] auc_df[col] = [a[1] for a in data['auc']] order = ['XBA', 'XBC', 'XBCCR', 'XBCOV', 'XBCR', 'XB', 'XCC', 'XCCR', 'XC'] sns.violinplot(auc_df, order=order) plt.title("Distributions of AUC") fig = plt.gcf() fig.set_size_inches(12, 7) fig.savefig('et_auc_dist.png', dpi=400, bbox_inches="tight") plt.close()
#def extract(res, k2): # k1 = k2[-1] # tmp = [(int(key.split(k1)[1]), val) for # key, val in res.items() if k2 in key] # return sorted(tmp, key=lambda tup: tup[0]) def mask2imp(mask, imp): mask_copy = mask.copy().astype(float) mask_copy[mask_copy > 0] = imp return mask_copy for key_i, val_i in dnh.items(): result = utils.read_pickle(os.path.join(dp, key_i)) header = np.genfromtxt(os.path.join(hp, val_i), dtype=str) #coef = extract(result, "coef") #aucs = [i[1] for i in extract(result, "auc")] #coef = [c[1] for c in result['coef']] aucs = [a[1] for a in result['auc']] #fimp_mat = np.array([coef[i][1] for i in range(len(coef))]) fimp_mat = np.array([c[1] for c in result['coef']]) wf_imp = (np.abs(fimp_mat) * np.array(aucs)[:, None]).sum(0) / np.sum(aucs) df = pd.DataFrame(columns=header) df.loc[0, :] = wf_imp fn = equate_n_sort(all_headers, header) data_save.append(pd.concat([fn, df]).fillna(0).sum(0)) def most_imp(data, nth_p):
from custom import utils from collections import Counter import matplotlib.pyplot as plt # command line inputs result_name = sys.argv[1] header_name = sys.argv[2] save_name = sys.argv[3] # headers hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers" with open(os.path.join(hp, header_name), "r") as tmp: headers = np.array(tmp.read().strip("\n").split("\n")) coefs = utils.read_pickle("../{}".format(result_name))['coef'] nz = lambda x: np.nonzero(x)[0] cog_scores = ['SOP', 'RPS', 'VLM'] cols = [] for coef in coefs: selected = headers[nz(coef[1])] cols.extend(selected.tolist()) counts = dict(Counter(cols)) to_keep = {} for key, val in counts.items(): s = 'DOMAIN' if ('SOP_' + s in key) or ('VLM_' + s in key) or ('RPS_' + s in key): to_keep[key] = val
# imp = imp.tolist() # for idx in range(max_mask): # if mask[idx]: # out_imp.append(imp[idx]) # else: # imp.insert(idx, 0) # out_imp.append(0) # return np.array(out_imp) def mask2imp(mask, imp): mask_copy = mask.copy().astype(float) mask_copy[mask_copy > 0] = imp return mask_copy for key_i, val_i in dnh.items(): result = utils.read_pickle(os.path.join(dp, key_i)) header = np.genfromtxt(os.path.join(hp, val_i), dtype=str) #fimp = extract(result, "fimp") #aucs = [i[1] for i in extract(result, "auc")] #masks = extract(result, "mask") fimp = np.array([f[1] for f in result['fimp']]) aucs = [a[1] for a in result['auc']] masks = np.array([m[1] for m in result['mask']]) fimp_mat = np.array([mask2imp(masks[i], fimp[i]) for i in range(len(fimp))]) #fimp_mat = np.array([fimp[i] for i in range(len(fimp))]) wf_imp =(np.abs(fimp_mat)*np.array(aucs)[:, None]).sum(0)/np.sum(aucs) df = pd.DataFrame(columns=header) df.loc[0, :] = wf_imp fn = equate_n_sort(all_headers, header) data_save.append(pd.concat([fn, df]).fillna(0).sum(0))
respath = { "lg_XBA.pkl":0, "lg_XBC.pkl":0, "lg_XCC.pkl":0, "lg_XBCCR.pkl":0, "lg_XBCR.pkl":0, "lg_XCCR.pkl":0, "lg_XBCOV.pkl":0, "lg_XB.pkl":0, "lg_XC.pkl":0 } path = "/storage/gablab001/data/genus/fs_cog/pred_diag/lg/results" for key, val in respath.items(): respath[key] = os.path.join(path, key) auc_df = pd.DataFrame(columns=[key.split('_')[1].replace('.pkl', '') for key in respath.keys()]) for key, val in respath.items(): col = key.split('_')[1].replace('.pkl', '') data = utils.read_pickle(val)['auc'] auc_df[col] = [i[1] for i in data] order = ['XBA','XBC','XBCCR','XBCOV','XBCR','XB','XCC','XCCR','XC'] sns.violinplot(auc_df, order=order) plt.title("Distributions of AUC") fig = plt.gcf() fig.set_size_inches(12, 7) fig.savefig('lg_auc_dist.png', dpi=400, bbox_inches="tight") plt.close()
from custom import utils from collections import Counter import matplotlib.pyplot as plt # command line inputs result_name = sys.argv[1] header_name = sys.argv[2] save_name = sys.argv[3] # headers hp = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers" with open(os.path.join(hp, header_name), "r") as tmp: headers = np.array(tmp.read().strip("\n").split("\n")) coefs = utils.read_pickle("../{}".format(result_name))['coef'] nz = lambda x: np.nonzero(x)[0] cog_scores = ['SOP','RPS','VLM'] cols = [] for coef in coefs: selected = headers[nz(coef[1])] cols.extend(selected.tolist()) counts = dict(Counter(cols)) to_keep = {} for key, val in counts.items(): s = 'DOMAIN' if ('SOP_'+s in key) or ('VLM_'+s in key) or ('RPS_'+s in key): to_keep[key] = val