def get_maxpvals(condition): alldata = {} allgroups = {} maxpvals = {} for i, gseid in enumerate([ gseid_ for gseid_ in listdir('../data/expression') if gseid_.startswith('GSE') ]): if not gseid.startswith('GSE'): continue maxpvals[gseid] = 0 alldata[gseid] = {} groups = pd.read_csv(join('../data/expression', gseid, 'processed/group_labels.tsv'), index_col='group_name', sep='\t') allgroups[gseid] = groups[groups.group_condition == condition] if len(allgroups[gseid]): for j, group in enumerate(allgroups[gseid].index.tolist()): alldata[gseid][group] = {} alldata[gseid][group]['fc'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'. format(gseid, group))) alldata[gseid][group]['rdma'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat' .format(gseid, group))) alldata[gseid][group]['rdmb'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat' .format(gseid, group))) pval_a = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdma']) pval_b = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdmb']) maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b]) else: maxpvals[gseid] = np.nan return maxpvals
default=None, help='Override output filepath') args = parser.parse_args() if args.out_file is None: out_file = '../gendata/plots/go_asthma_copd_bp_mf_cc.pdf' else: out_file = args.out_file if args.config is not None: config = utils.read_config(args.config) types = ['fc', 'rdm_a', 'rdm_b'] seqsims_cc = { tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_cc_1000.dat'.format(tp))) for tp in types } seqsims_mf = { tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_mf_1000.dat'.format(tp))) for tp in types } seqsims_bp = { tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_bp_1000.dat'.format(tp))) for tp in types } plt.figure(figsize=[8, 4]) data = [
maxpvals = {} for i, gseid in enumerate([ gseid_ for gseid_ in listdir('../data/expression') if gseid_.startswith('GSE') ]): maxpvals[gseid] = 0 alldata[gseid] = {} groups = pd.read_csv(join('../data/expression', gseid, 'processed/group_labels.tsv'), index_col='group_name', sep='\t') allgroups[gseid] = groups[groups.group_condition == 'healthy'] for j, group in enumerate(allgroups[gseid].index.tolist()): alldata[gseid][group] = {} alldata[gseid][group]['fc'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_1000.dat'.format( gseid, group))) alldata[gseid][group]['rdma'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_a_1000.dat'.format( gseid, group))) alldata[gseid][group]['rdmb'] = utils.non_nan( np.loadtxt( '../gendata/coexpression/{}/seqcorrs_{}_rdm_b_1000.dat'.format( gseid, group))) pval_a = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdma']) pval_b = utils.mwpval(alldata[gseid][group]['fc'], alldata[gseid][group]['rdmb']) maxpvals[gseid] = max([maxpvals[gseid], pval_a, pval_b])
diseases = pd.read_csv( '../data/disease_ontology/processed/diseases_size_min_50.tsv', sep='\t', index_col='diseaseId') obs_dir = '../gendata/seqsim/disgenet_diseases/related' N_samples = 100 pairs = [pair.split('_') for pair in listdir(obs_dir)] obs_seqsims = [] typea_seqsims = [] typeb_seqsims = [] for pair in pairs: pair = '_'.join(pair) obs_seqsims.append( non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_fc.dat')))) typea_seqsims.append( non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_a.dat')))) typeb_seqsims.append( non_nan(np.loadtxt(join(obs_dir, pair, 'seqsims_rdm_b.dat')))) ylbl = [ diseases.loc[d0].diseaseName + '---' + diseases.loc[d1].diseaseName for d0, d1 in pairs ] ylbl_short = [ utils.trim_text(diseases.loc[d0].diseaseName, maxchar=15) + '---' + utils.trim_text(diseases.loc[d1].diseaseName, maxchar=15) for d0, d1 in pairs ]
if args.out_file is None: out_file = '../gendata/plots/asthma_pneumonia_ipf.pdf' else: out_file = args.out_file if args.config is not None: config = utils.read_config(args.config) def non_nan(arr): arr = np.asarray(arr) return arr[~np.isnan(arr)] asthma_copd_disease_4302 = non_nan( np.loadtxt( '../gendata/coexpression/GSE4302/seqcorrs_disease_gsms_1000.dat')) asthma_pneumonia_disease = non_nan( np.loadtxt( '../gendata/coexpression/asthma_pneumonia/seqcorrs_disease_gsms.dat')) asthma_ipf_disease = non_nan( np.loadtxt('../gendata/coexpression/asthma_ipf/seqcorrs_disease_gsms.dat')) boxprops = {'linewidth': 1, 'zorder': 3} whiskerprops = {'linewidth': 1, 'zorder': 3} capprops = {'linewidth': 1, 'zorder': 3} medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6} flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1} labels = ['Ast.-pneumonia', 'Ast.-IPF', 'Asthma-COPD']
if args.out_file is None: out_file = '../gendata/plots/asthma_pneumonia_ipf.pdf' else: out_file = args.out_file if args.config is not None: config = utils.read_config(args.config) def non_nan(arr): arr = np.asarray(arr) return arr[~np.isnan(arr)] asthma_copd_disease_57148 = non_nan( np.loadtxt( '../gendata/coexpression/GSE57148/seqcorrs_disease_gsms_1000.dat')) copd_pneumonia_disease = non_nan( np.loadtxt( '../gendata/coexpression/copd_pneumonia/seqcorrs_disease_gsms.dat')) copd_ipf_disease = non_nan( np.loadtxt('../gendata/coexpression/copd_ipf/seqcorrs_disease_gsms.dat')) boxprops = {'linewidth': 1, 'zorder': 3} whiskerprops = {'linewidth': 1, 'zorder': 3} capprops = {'linewidth': 1, 'zorder': 3} medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6} flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1} labels = ['Pneumonia-COPD', 'IPF-COPD', 'Asthma-COPD'] pos = [1, 2, 3]
parser.add_argument('--no_show', action='store_true', help='Do not show plots') parser.add_argument('--out_file', default=None, help='Override output filepath') args = parser.parse_args() if args.out_file is None: out_file = '../gendata/plots/asthma_copd_random_diseases_gse57148_boxplot.pdf' else: out_file = args.out_file if args.config is not None: config = utils.read_config(args.config) obs_disease = non_nan( np.loadtxt( '../gendata/coexpression/GSE57148/seqcorrs_disease_gsms_1000.dat')) dis = pd.read_csv( '../gendata/coexpression/rdm_disgenet/diseases_size_min_24_max_36.tsv', sep='\t', index_col='diseaseId') chronic = dis[dis.diseaseName.str.lower().str.contains( 'chronic')].index.tolist() datadir = '../gendata/coexpression/rdm_disgenet/copd' filenames = [ filename for filename in listdir(datadir) if filename.split('_')[1] not in chronic and '_disease' in filename ] # we want unrelated diseases
parser.add_argument('--out_file', default=None, help='Override output filepath') args = parser.parse_args() if args.out_file is None: out_file = '../gendata/plots/go_asthma_copd.pdf' else: out_file = args.out_file if args.config is not None: config = utils.read_config(args.config) types = ['fc', 'rdm_a', 'rdm_b'] seqsims = { tp: non_nan(np.loadtxt('../gendata/seqsim/seqsims_{}_1000.dat'.format(tp))) for tp in types } print(utils.mwpval(seqsims['fc'], seqsims['rdm_a'])) print(utils.mwpval(seqsims['fc'], seqsims['rdm_b'])) plt.figure(figsize=[5, 5]) pos = [0, 1, 2] boxprops = {'linewidth': 1, 'zorder': 3} whiskerprops = {'linewidth': 1, 'zorder': 3} capprops = {'linewidth': 1, 'zorder': 3} medianprops = {'color': 'red', 'linewidth': 1.5, 'zorder': 6} flierprops = {'marker': 'o', 'markeredgecolor': 'lightgray', 'zorder': 1} plt.gca().yaxis.grid(True, zorder=0, linestyle=':') bplot = plt.boxplot([seqsims['rdm_a'], seqsims['rdm_b'], seqsims['fc']],