Ejemplo n.º 1
0
def get_maxpvals(condition):
    alldata = {}
    allgroups = {}
    maxpvals = {}
    for i, gseid in enumerate([
            gseid_ for gseid_ in listdir('../data/expression')
            if gseid_.startswith('GSE')
    ]):
        if not gseid.startswith('GSE'):
            continue
        maxpvals[gseid] = 0
        alldata[gseid] = {}
        groups = pd.read_csv(join('../data/expression', gseid,
                                  'processed/group_labels.tsv'),
                             index_col='group_name',
                             sep='\t')
        allgroups[gseid] = groups[groups.group_condition == condition]
        if len(allgroups[gseid]):
            for j, group in enumerate(allgroups[gseid].index.tolist()):
                alldata[gseid][group] = {}
                alldata[gseid][group]['fc'] = utils.non_nan(
                    np.loadtxt(
                        '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'.
                        format(gseid, group)))
                alldata[gseid][group]['rdma'] = utils.non_nan(
                    np.loadtxt(
                        '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat'
                        .format(gseid, group)))
                alldata[gseid][group]['rdmb'] = utils.non_nan(
                    np.loadtxt(
                        '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat'
                        .format(gseid, group)))
                pval_a = utils.mwpval(alldata[gseid][group]['fc'],
                                      alldata[gseid][group]['rdma'])
                pval_b = utils.mwpval(alldata[gseid][group]['fc'],
                                      alldata[gseid][group]['rdmb'])
                maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b])
        else:
            maxpvals[gseid] = np.nan
    return maxpvals
Ejemplo n.º 2
0
    allgroups[gseid] = groups[groups.group_condition == 'healthy']
    for j, group in enumerate(allgroups[gseid].index.tolist()):
        alldata[gseid][group] = {}
        alldata[gseid][group]['fc'] = utils.non_nan(
            np.loadtxt(
                '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'.format(
                    gseid, group)))
        alldata[gseid][group]['rdma'] = utils.non_nan(
            np.loadtxt(
                '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat'.format(
                    gseid, group)))
        alldata[gseid][group]['rdmb'] = utils.non_nan(
            np.loadtxt(
                '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat'.format(
                    gseid, group)))
        pval_a = utils.mwpval(alldata[gseid][group]['fc'],
                              alldata[gseid][group]['rdma'])
        pval_b = utils.mwpval(alldata[gseid][group]['fc'],
                              alldata[gseid][group]['rdmb'])
        maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b])

#sorted_gseid = [k for k in sorted(maxpvals, key=maxpvals.get, reverse=False)]
sorted_gseid = maxpvals.keys()

plt.figure(figsize=[20, 10])
for i, gseid in enumerate(sorted_gseid):
    plotdata = []
    groups = allgroups[gseid]
    pos = np.arange(1, 4)  # 1,2,3
    plt.subplot(4, 5, i + 1)
    if len(groups):
        for j, group in enumerate(groups.index.tolist()):
Ejemplo n.º 3
0
        non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_a.dat'))))
    typeb_seqsims.append(
        non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_b.dat'))))

ylbl = [
    diseases.loc[d0].diseaseName + '---' + diseases.loc[d1].diseaseName
    for d0, d1 in pairs
]
ylbl_short = [
    utils.trim_text(diseases.loc[d0].diseaseName, maxchar=15) + '---' +
    utils.trim_text(diseases.loc[d1].diseaseName, maxchar=15)
    for d0, d1 in pairs
]

pvals_typea = np.asarray(
    [utils.mwpval(obs, rdm) for obs, rdm in zip(obs_seqsims, typea_seqsims)])
pvals_typeb = np.asarray(
    [utils.mwpval(obs, rdm) for obs, rdm in zip(obs_seqsims, typeb_seqsims)])

pos = [1, 2, 3]
plt.figure(figsize=[20, 15])
for i in trange(len(pairs)):
    plt.subplot(8, 9, i + 1)
    plt.gca().yaxis.grid(True, zorder=0, linestyle=':')
    boxprops = {'linewidth': 1., 'zorder': 3}
    whiskerprops = {'linewidth': 1.}
    capprops = {'linewidth': 1.}
    medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6}
    flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 3}
    bplot = plt.boxplot([typea_seqsims[i], typeb_seqsims[i], obs_seqsims[i]],
                        positions=pos,
Ejemplo n.º 4
0
    rdm_seqsims.append(
        utils.read_nonrectangular(join(rdm_dir, pair + '.nonrect')))

ylbl = [
    diseases.loc[d0].diseaseName + '---' + diseases.loc[d1].diseaseName
    for d0, d1 in pairs
]
ylbl_short = [
    utils.trim_text(diseases.loc[d0].diseaseName) + '---' +
    utils.trim_text(diseases.loc[d1].diseaseName) for d0, d1 in pairs
]

pvals_rdm = np.zeros([len(obs_seqsims), N_samples])
for i in range(len(obs_seqsims)):
    for j in range(N_samples):
        pvals_rdm[i, j] = utils.mwpval(obs_seqsims[i], rdm_seqsims[i][j])

# make a color map of fixed colors
pval_colors = ['green', 'yellowgreen', 'orange', 'red']
cmap = colors.ListedColormap(pval_colors)
bounds = [0, 1e-10, 1e-4, 0.05, 1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.figure(figsize=[20, 20])
for i in range(pvals_rdm.shape[0]):
    pvals_rdm[i, :] = sorted(pvals_rdm[i, :], reverse=True)
plt.imshow(pvals_rdm, cmap=cmap, aspect='equal')

# Minor ticks
plt.gca().set_xticks(np.arange(0, pvals_rdm.shape[1]) + 0.5, minor=True)
plt.gca().set_yticks(np.arange(0, pvals_rdm.shape[0]) + 0.5, minor=True)
plt.yticks(range(pvals_rdm.shape[0]), ylbl)
    filename for filename in listdir(datadir)
    if filename.split('_')[1] not in chronic and '_disease' in filename
]  # we want unrelated diseases

disIds_disease = [filename.split('_')[1] for filename in filenames]

sc_disease = [
    non_nan(np.loadtxt(join(datadir, filename))) for filename in filenames
]

disNames_disease = [
    dis.loc[disId].diseaseName
    for disId, filename in zip(disIds_disease, filenames)
    if '_disease' in filename
]
pvals = [utils.mwpval(obs_disease, sc_dis) for sc_dis in sc_disease]

#sort_idx = sorted(range(len(sc_disease)), key=lambda i: np.median(sc_disease[i]), reverse=True)
sort_idx = np.argsort(pvals)[::-1]

sc_disease_sort = [sc_disease[i] for i in sort_idx]
disNames_disease_sort = [disNames_disease[i] for i in sort_idx]
pvals_sort = [pvals[i] for i in sort_idx]

N_max = 10

plt.figure()

boxprops = {'linewidth': 1, 'zorder': 3}
whiskerprops = {'linewidth': 1, 'zorder': 3}
capprops = {'linewidth': 1, 'zorder': 3}
Ejemplo n.º 6
0
if args.out_file is None:
    out_file = '../gendata/plots/go_asthma_copd.pdf'
else:
    out_file = args.out_file

if args.config is not None:
    config = utils.read_config(args.config)

types = ['fc', 'rdm_a', 'rdm_b']
seqsims = {
    tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_1000.dat'.format(tp)))
    for tp in types
}

print(utils.mwpval(seqsims['fc'], seqsims['rdm_a']))
print(utils.mwpval(seqsims['fc'], seqsims['rdm_b']))

plt.figure(figsize=[5, 5])
pos = [0, 1, 2]
boxprops = {'linewidth': 1, 'zorder': 3}
whiskerprops = {'linewidth': 1, 'zorder': 3}
capprops = {'linewidth': 1, 'zorder': 3}
medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6}
flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1}
plt.gca().yaxis.grid(True, zorder=0, linestyle=':')
bplot = plt.boxplot([seqsims['rdm_a'], seqsims['rdm_b'], seqsims['fc']],
                    positions=pos,
                    patch_artist=True,
                    widths=config['go_boxplot_width'],
                    medianprops=medianprops,