def get_maxpvals(condition): alldata = {} allgroups = {} maxpvals = {} for i, gseid in enumerate([ gseid_ for gseid_ in listdir('../data/expression') if gseid_.startswith('GSE') ]): if not gseid.startswith('GSE'): continue maxpvals[gseid] = 0 alldata[gseid] = {} groups = pd.read_csv(join('../data/expression', gseid, 'processed/group_labels.tsv'), index_col='group_name', sep='\t') allgroups[gseid] = groups[groups.group_condition == condition] if len(allgroups[gseid]): for j, group in enumerate(allgroups[gseid].index.tolist()): alldata[gseid][group] = {} alldata[gseid][group]['fc'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'. format(gseid, group))) alldata[gseid][group]['rdma'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat' .format(gseid, group))) alldata[gseid][group]['rdmb'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat' .format(gseid, group))) pval_a = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdma']) pval_b = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdmb']) maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b]) else: maxpvals[gseid] = np.nan return maxpvals
allgroups[gseid] = groups[groups.group_condition == 'healthy'] for j, group in enumerate(allgroups[gseid].index.tolist()): alldata[gseid][group] = {} alldata[gseid][group]['fc'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'.format( gseid, group))) alldata[gseid][group]['rdma'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat'.format( gseid, group))) alldata[gseid][group]['rdmb'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat'.format( gseid, group))) pval_a = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdma']) pval_b = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdmb']) maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b]) #sorted_gseid = [k for k in sorted(maxpvals, key=maxpvals.get, reverse=False)] sorted_gseid = maxpvals.keys() plt.figure(figsize=[20, 10]) for i, gseid in enumerate(sorted_gseid): plotdata = [] groups = allgroups[gseid] pos = np.arange(1, 4) # 1,2,3 plt.subplot(4, 5, i + 1) if len(groups): for j, group in enumerate(groups.index.tolist()):
non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_a.dat')))) typeb_seqsims.append( non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_b.dat')))) ylbl = [ diseases.loc[d0].diseaseName + '---' + diseases.loc[d1].diseaseName for d0, d1 in pairs ] ylbl_short = [ utils.trim_text(diseases.loc[d0].diseaseName, maxchar=15) + '---' + utils.trim_text(diseases.loc[d1].diseaseName, maxchar=15) for d0, d1 in pairs ] pvals_typea = np.asarray( [utils.mwpval(obs, rdm) for obs, rdm in zip(obs_seqsims, typea_seqsims)]) pvals_typeb = np.asarray( [utils.mwpval(obs, rdm) for obs, rdm in zip(obs_seqsims, typeb_seqsims)]) pos = [1, 2, 3] plt.figure(figsize=[20, 15]) for i in trange(len(pairs)): plt.subplot(8, 9, i + 1) plt.gca().yaxis.grid(True, zorder=0, linestyle=':') boxprops = {'linewidth': 1., 'zorder': 3} whiskerprops = {'linewidth': 1.} capprops = {'linewidth': 1.} medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6} flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 3} bplot = plt.boxplot([typea_seqsims[i], typeb_seqsims[i], obs_seqsims[i]], positions=pos,
rdm_seqsims.append( utils.read_nonrectangular(join(rdm_dir, pair + '.nonrect'))) ylbl = [ diseases.loc[d0].diseaseName + '---' + diseases.loc[d1].diseaseName for d0, d1 in pairs ] ylbl_short = [ utils.trim_text(diseases.loc[d0].diseaseName) + '---' + utils.trim_text(diseases.loc[d1].diseaseName) for d0, d1 in pairs ] pvals_rdm = np.zeros([len(obs_seqsims), N_samples]) for i in range(len(obs_seqsims)): for j in range(N_samples): pvals_rdm[i, j] = utils.mwpval(obs_seqsims[i], rdm_seqsims[i][j]) # make a color map of fixed colors pval_colors = ['green', 'yellowgreen', 'orange', 'red'] cmap = colors.ListedColormap(pval_colors) bounds = [0, 1e-10, 1e-4, 0.05, 1] norm = colors.BoundaryNorm(bounds, cmap.N) plt.figure(figsize=[20, 20]) for i in range(pvals_rdm.shape[0]): pvals_rdm[i, :] = sorted(pvals_rdm[i, :], reverse=True) plt.imshow(pvals_rdm, cmap=cmap, aspect='equal') # Minor ticks plt.gca().set_xticks(np.arange(0, pvals_rdm.shape[1]) + 0.5, minor=True) plt.gca().set_yticks(np.arange(0, pvals_rdm.shape[0]) + 0.5, minor=True) plt.yticks(range(pvals_rdm.shape[0]), ylbl)
filename for filename in listdir(datadir) if filename.split('_')[1] not in chronic and '_disease' in filename ] # we want unrelated diseases disIds_disease = [filename.split('_')[1] for filename in filenames] sc_disease = [ non_nan(np.loadtxt(join(datadir, filename))) for filename in filenames ] disNames_disease = [ dis.loc[disId].diseaseName for disId, filename in zip(disIds_disease, filenames) if '_disease' in filename ] pvals = [utils.mwpval(obs_disease, sc_dis) for sc_dis in sc_disease] #sort_idx = sorted(range(len(sc_disease)), key=lambda i: np.median(sc_disease[i]), reverse=True) sort_idx = np.argsort(pvals)[::-1] sc_disease_sort = [sc_disease[i] for i in sort_idx] disNames_disease_sort = [disNames_disease[i] for i in sort_idx] pvals_sort = [pvals[i] for i in sort_idx] N_max = 10 plt.figure() boxprops = {'linewidth': 1, 'zorder': 3} whiskerprops = {'linewidth': 1, 'zorder': 3} capprops = {'linewidth': 1, 'zorder': 3}
if args.out_file is None: out_file = '../gendata/plots/go_asthma_copd.pdf' else: out_file = args.out_file if args.config is not None: config = utils.read_config(args.config) types = ['fc', 'rdm_a', 'rdm_b'] seqsims = { tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_1000.dat'.format(tp))) for tp in types } print(utils.mwpval(seqsims['fc'], seqsims['rdm_a'])) print(utils.mwpval(seqsims['fc'], seqsims['rdm_b'])) plt.figure(figsize=[5, 5]) pos = [0, 1, 2] boxprops = {'linewidth': 1, 'zorder': 3} whiskerprops = {'linewidth': 1, 'zorder': 3} capprops = {'linewidth': 1, 'zorder': 3} medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6} flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1} plt.gca().yaxis.grid(True, zorder=0, linestyle=':') bplot = plt.boxplot([seqsims['rdm_a'], seqsims['rdm_b'], seqsims['fc']], positions=pos, patch_artist=True, widths=config['go_boxplot_width'], medianprops=medianprops,