예제 #1
0
def get_maxpvals(condition):
    alldata = {}
    allgroups = {}
    maxpvals = {}
    for i, gseid in enumerate([
            gseid_ for gseid_ in listdir('../data/expression')
            if gseid_.startswith('GSE')
    ]):
        if not gseid.startswith('GSE'):
            continue
        maxpvals[gseid] = 0
        alldata[gseid] = {}
        groups = pd.read_csv(join('../data/expression', gseid,
                                  'processed/group_labels.tsv'),
                             index_col='group_name',
                             sep='\t')
        allgroups[gseid] = groups[groups.group_condition == condition]
        if len(allgroups[gseid]):
            for j, group in enumerate(allgroups[gseid].index.tolist()):
                alldata[gseid][group] = {}
                alldata[gseid][group]['fc'] = utils.non_nan(
                    np.loadtxt(
                        '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'.
                        format(gseid, group)))
                alldata[gseid][group]['rdma'] = utils.non_nan(
                    np.loadtxt(
                        '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat'
                        .format(gseid, group)))
                alldata[gseid][group]['rdmb'] = utils.non_nan(
                    np.loadtxt(
                        '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat'
                        .format(gseid, group)))
                pval_a = utils.mwpval(alldata[gseid][group]['fc'],
                                      alldata[gseid][group]['rdma'])
                pval_b = utils.mwpval(alldata[gseid][group]['fc'],
                                      alldata[gseid][group]['rdmb'])
                maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b])
        else:
            maxpvals[gseid] = np.nan
    return maxpvals
예제 #2
0
                    default=None,
                    help='Override output filepath')
args = parser.parse_args()

if args.out_file is None:
    out_file = '../gendata/plots/go_asthma_copd_bp_mf_cc.pdf'
else:
    out_file = args.out_file

if args.config is not None:
    config = utils.read_config(args.config)

types = ['fc', 'rdm_a', 'rdm_b']
seqsims_cc = {
    tp:
    non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_cc_1000.dat'.format(tp)))
    for tp in types
}
seqsims_mf = {
    tp:
    non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_mf_1000.dat'.format(tp)))
    for tp in types
}
seqsims_bp = {
    tp:
    non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_bp_1000.dat'.format(tp)))
    for tp in types
}

plt.figure(figsize=[8, 4])
data = [
예제 #3
0
maxpvals = {}
for i, gseid in enumerate([
        gseid_ for gseid_ in listdir('../data/expression')
        if gseid_.startswith('GSE')
]):
    maxpvals[gseid] = 0
    alldata[gseid] = {}
    groups = pd.read_csv(join('../data/expression', gseid,
                              'processed/group_labels.tsv'),
                         index_col='group_name',
                         sep='\t')
    allgroups[gseid] = groups[groups.group_condition == 'healthy']
    for j, group in enumerate(allgroups[gseid].index.tolist()):
        alldata[gseid][group] = {}
        alldata[gseid][group]['fc'] = utils.non_nan(
            np.loadtxt(
                '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'.format(
                    gseid, group)))
        alldata[gseid][group]['rdma'] = utils.non_nan(
            np.loadtxt(
                '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat'.format(
                    gseid, group)))
        alldata[gseid][group]['rdmb'] = utils.non_nan(
            np.loadtxt(
                '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat'.format(
                    gseid, group)))
        pval_a = utils.mwpval(alldata[gseid][group]['fc'],
                              alldata[gseid][group]['rdma'])
        pval_b = utils.mwpval(alldata[gseid][group]['fc'],
                              alldata[gseid][group]['rdmb'])
        maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b])
예제 #4
0
diseases = pd.read_csv(
    '../data/disease_ontology/processed/diseases_size_min_50.tsv',
    sep='\t',
    index_col='diseaseId')
obs_dir = '../gendata/seqsim/disgenet_diseases/related'
N_samples = 100

pairs = [pair.split('_') for pair in listdir(obs_dir)]

obs_seqsims = []
typea_seqsims = []
typeb_seqsims = []
for pair in pairs:
    pair = '_'.join(pair)
    obs_seqsims.append(
        non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_fc.dat'))))
    typea_seqsims.append(
        non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_a.dat'))))
    typeb_seqsims.append(
        non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_b.dat'))))

ylbl = [
    diseases.loc[d0].diseaseName + '---' + diseases.loc[d1].diseaseName
    for d0, d1 in pairs
]
ylbl_short = [
    utils.trim_text(diseases.loc[d0].diseaseName, maxchar=15) + '---' +
    utils.trim_text(diseases.loc[d1].diseaseName, maxchar=15)
    for d0, d1 in pairs
]
if args.out_file is None:
    out_file = '../gendata/plots/asthma_pneumonia_ipf.pdf'
else:
    out_file = args.out_file

if args.config is not None:
    config = utils.read_config(args.config)


def non_nan(arr):
    arr = np.asarray(arr)
    return arr[~np.isnan(arr)]


asthma_copd_disease_4302 = non_nan(
    np.loadtxt(
        '../gendata/coexpression/GSE4302/seqcorrs_disease_gsms_1000.dat'))

asthma_pneumonia_disease = non_nan(
    np.loadtxt(
        '../gendata/coexpression/asthma_pneumonia/seqcorrs_disease_gsms.dat'))

asthma_ipf_disease = non_nan(
    np.loadtxt('../gendata/coexpression/asthma_ipf/seqcorrs_disease_gsms.dat'))

boxprops = {'linewidth': 1, 'zorder': 3}
whiskerprops = {'linewidth': 1, 'zorder': 3}
capprops = {'linewidth': 1, 'zorder': 3}
medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6}
flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1}
labels = ['Ast.-pneumonia', 'Ast.-IPF', 'Asthma-COPD']
if args.out_file is None:
    out_file = '../gendata/plots/asthma_pneumonia_ipf.pdf'
else:
    out_file = args.out_file

if args.config is not None:
    config = utils.read_config(args.config)


def non_nan(arr):
    arr = np.asarray(arr)
    return arr[~np.isnan(arr)]


asthma_copd_disease_57148 = non_nan(
    np.loadtxt(
        '../gendata/coexpression/GSE57148/seqcorrs_disease_gsms_1000.dat'))
copd_pneumonia_disease = non_nan(
    np.loadtxt(
        '../gendata/coexpression/copd_pneumonia/seqcorrs_disease_gsms.dat'))
copd_ipf_disease = non_nan(
    np.loadtxt('../gendata/coexpression/copd_ipf/seqcorrs_disease_gsms.dat'))

boxprops = {'linewidth': 1, 'zorder': 3}
whiskerprops = {'linewidth': 1, 'zorder': 3}
capprops = {'linewidth': 1, 'zorder': 3}
medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6}
flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1}
labels = ['Pneumonia-COPD', 'IPF-COPD', 'Asthma-COPD']

pos = [1, 2, 3]
parser.add_argument('--no_show', action='store_true', help='Do not show plots')
parser.add_argument('--out_file',
                    default=None,
                    help='Override output filepath')
args = parser.parse_args()

if args.out_file is None:
    out_file = '../gendata/plots/asthma_copd_random_diseases_gse57148_boxplot.pdf'
else:
    out_file = args.out_file

if args.config is not None:
    config = utils.read_config(args.config)

obs_disease = non_nan(
    np.loadtxt(
        '../gendata/coexpression/GSE57148/seqcorrs_disease_gsms_1000.dat'))

dis = pd.read_csv(
    '../gendata/coexpression/rdm_disgenet/diseases_size_min_24_max_36.tsv',
    sep='\t',
    index_col='diseaseId')
chronic = dis[dis.diseaseName.str.lower().str.contains(
    'chronic')].index.tolist()

datadir = '../gendata/coexpression/rdm_disgenet/copd'
filenames = [
    filename for filename in listdir(datadir)
    if filename.split('_')[1] not in chronic and '_disease' in filename
]  # we want unrelated diseases
예제 #8
0
parser.add_argument('--out_file',
                    default=None,
                    help='Override output filepath')
args = parser.parse_args()

if args.out_file is None:
    out_file = '../gendata/plots/go_asthma_copd.pdf'
else:
    out_file = args.out_file

if args.config is not None:
    config = utils.read_config(args.config)

types = ['fc', 'rdm_a', 'rdm_b']
seqsims = {
    tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_1000.dat'.format(tp)))
    for tp in types
}

print(utils.mwpval(seqsims['fc'], seqsims['rdm_a']))
print(utils.mwpval(seqsims['fc'], seqsims['rdm_b']))

plt.figure(figsize=[5, 5])
pos = [0, 1, 2]
boxprops = {'linewidth': 1, 'zorder': 3}
whiskerprops = {'linewidth': 1, 'zorder': 3}
capprops = {'linewidth': 1, 'zorder': 3}
medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6}
flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1}
plt.gca().yaxis.grid(True, zorder=0, linestyle=':')
bplot = plt.boxplot([seqsims['rdm_a'], seqsims['rdm_b'], seqsims['fc']],