def main(): parser = argparse.ArgumentParser() parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon', help='a set of mutation annotation levels') parser.add_argument('--samp_cutoff', default=20) # parse command line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(os.path.join(plot_dir, '{}_{}'.format(args.cohort, args.gene)), exist_ok=True) cdata = load_cohort(args.cohort, [args.gene], args.mut_levels.split('__')) pheno_dict, auc_list, simil_df = compare_scores( load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)), cdata) plot_similarity_scatter(simil_df.copy(), auc_list.copy(), pheno_dict.copy(), args)
def main(): parser = argparse.ArgumentParser( "Plot the ordering of the subtypes of a module of genes in a given " "cohort based on how their isolated expression signatures classify " "one another.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', type=str, help='a set of mutation annotation levels') parser.add_argument('genes', type=str, nargs='+', help='a list of mutated genes') parser.add_argument('--samp_cutoff', type=int, default=20) # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = syn_root syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=args.genes, mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=expr_dir, var_source='mc3', copy_source='Firehose', domain_dir=domain_dir, annot_file=annot_file, syn=syn, cv_prop=1.0) pheno_dict, auc_list, simil_df = compare_scores( load_infer_output( os.path.join(base_dir, 'output', args.cohort, '_'.join(sorted(args.genes)), args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)), cdata) simil_rank = simil_df.mean(axis=1) - simil_df.mean(axis=0) simil_order = [ mtypes for mtypes, _ in sorted(tuple(simil_rank.iteritems()), key=lambda k: (k[0][0].subtype_list()[0][0], k[1])) ] simil_df = simil_df.loc[simil_order, simil_order[::-1]] plot_singleton_ordering(simil_df.copy(), auc_list.copy(), pheno_dict.copy(), args) plot_singleton_clustering(simil_df.copy(), auc_list.copy(), pheno_dict.copy(), args) plot_all_clustering(simil_df.copy(), auc_list.copy(), args)
def main(): parser = argparse.ArgumentParser( "Plot the ordering of a gene's subtypes in a given cohort based on " "how their isolated expression signatures classify one another.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon', help='a set of mutation annotation levels') parser.add_argument('--samp_cutoff', default=20) parser.add_argument('--all_mcombs', '-a', action='store_true', help=("plot results for all mutation types as " "opposed to just singletons")) # parse command line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(os.path.join(plot_dir, '{}_{}'.format(args.cohort, args.gene)), exist_ok=True) cdata = load_cohort_data(base_dir, args.cohort, args.gene, args.mut_levels) infer_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)) if args.all_mcombs: use_mtypes = infer_df.index else: use_mtypes = [ mcomb for mcomb in infer_df.index if all( len(mtype.subkeys()) == 1 for mtype in mcomb.mtypes) ] pheno_dict, auc_list, simil_df = compare_scores(infer_df.loc[use_mtypes], cdata) plot_singleton_ordering(simil_df.copy(), auc_list.copy(), pheno_dict.copy(), args) plot_singleton_clustering(simil_df.copy(), auc_list.copy(), pheno_dict.copy(), args) if args.all_mcombs: plot_all_ordering(simil_df.copy(), auc_list.copy(), args) plot_all_clustering(simil_df.copy(), auc_list.copy(), args)
def main(): parser = argparse.ArgumentParser( "Plot the ordering of a gene's subtypes in a given cohort based on " "how their isolated expression signatures classify one another.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', type=str, help='a set of mutation annotation levels') parser.add_argument('genes', type=str, nargs='+', help='a list of mutated genes') parser.add_argument('--samp_cutoff', type=int, default=25) # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=args.genes, mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) simil_df, auc_list = get_similarities( load_infer_output( os.path.join(base_dir, 'output', args.cohort, '_'.join(sorted(args.genes)), args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)), args.genes, cdata) print(simil_df.shape) simil_rank = simil_df.mean(axis=1) - simil_df.mean(axis=0) simil_order = simil_rank.sort_values().index simil_df = simil_df.loc[simil_order, reversed(simil_order)] plot_singleton_ordering(simil_df.copy(), auc_list.copy(), args, cdata) plot_all_ordering(simil_df.copy(), auc_list.copy(), args, cdata)
def main(): parser = argparse.ArgumentParser( "Plot the positions predicted for each sample in a given cohort by a " "multi-task model trained on pairs of mutation subtypes of a gene in " "two-dimensional inferred label space." ) parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('mut_levels', help='a set of mutation annotation levels') parser.add_argument('model_name', help='a Stan multi-task learning model') parser.add_argument('solve_method', choices=['optim', 'variat', 'sampl'], help='method used to obtain Stan parameter estimates') # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs( os.path.join(plot_dir, args.cohort, args.gene, args.mut_levels), exist_ok=True ) multi_df = load_infer_output(os.path.join( base_dir, 'output', args.cohort, args.gene, args.mut_levels, args.model_name, args.solve_method )) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=[args.gene], mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) for (mtype1, mtype2), infer_vals in multi_df.iterrows(): plot_position(infer_vals, args, cdata, mtype1, mtype2)
def main(): parser = argparse.ArgumentParser( description='Plot experiment results for given mutation classifier.') parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon') parser.add_argument('--samp_cutoff', default=20) # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(os.path.join(plot_dir, args.cohort, args.gene), exist_ok=True) prob_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)).applymap(np.mean) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=None, samp_cutoff=20, mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) singl_mtypes = [ mtype for mtype in prob_df.index if len(mtype.subkeys()) == 1 ] for singl_mtype in singl_mtypes: plot_mtype_positions(prob_df.loc[singl_mtype, :], args, cdata)
def main(): parser = argparse.ArgumentParser( "Plot how well expression signatures separate isolated mutation " "subtypes from non-mutated samples relative to how they separate " "mutated samples not belonging to the subtype.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon', help='a set of mutation annotation levels') parser.add_argument('--samp_cutoff', type=int, default=20) args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = syn_root syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=[args.gene], mut_levels=args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) infer_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)) auc_vals, sep_vals, prop_vals = get_separation(infer_df, args, cdata) plot_separation(auc_vals, sep_vals, prop_vals, args, cdata)
def main(): parser = argparse.ArgumentParser( "Plot the ordering of the simplest subtypes within a module of genes " "in a given cohort based on how their isolated expression signatures " "classify one another.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', type=str, help='a set of mutation annotation levels') parser.add_argument('genes', type=str, nargs='+', help='a list of mutated genes') parser.add_argument('--samp_cutoff', type=int, default=25) # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=args.genes, mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) infer_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, '_'.join(sorted(args.genes)), args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)) base_pheno = np.array( cdata.train_pheno(MuType({('Gene', tuple(args.genes)): None}))) auc_list = get_aucs(infer_df, base_pheno, cdata).sort_values(ascending=False) auc_list = auc_list[auc_list > 0.6] mtype_lens = {mtype: len(mtype.subkeys()) for mtype in auc_list.index} mtype_list = sorted(auc_list.index, key=lambda mtype: mtype_lens[mtype]) mtype_genes = { mtype: mtype.subtype_list()[0][0] for mtype in auc_list.index } mtype_samps = { mtype: mtype.get_samples(cdata.train_mut) for mtype in auc_list.index } plot_mtypes = reduce(or_, [ set([mtype for mtype in mtype_list if mtype_genes[mtype] == gene][:3]) for gene in args.genes ]) ovlp_threshold = 0.5 i = j = 1 while len(plot_mtypes) <= 15: ovlp_score = min( len(mtype_samps[mtype_list[i]] ^ mtype_samps[plot_mtype]) / max(len(mtype_samps[mtype_list[i]]), len(mtype_samps[plot_mtype])) for plot_mtype in plot_mtypes) if ovlp_score >= ovlp_threshold: plot_mtypes |= {mtype_list[i]} i += 1 if i >= len(mtype_list): j += 1 i = j ovlp_threshold **= 4 / 3 simil_df = get_similarities(infer_df.loc[plot_mtypes, :], base_pheno, cdata) plot_gene_ordering(simil_df, auc_list, args, cdata)
def main(): parser = argparse.ArgumentParser( description='Plot experiment results for given mutation classifier.') parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon', help='a set of mutation annotation levels') parser.add_argument('--samp_cutoff', default=20) # parse command line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(os.path.join( plot_dir, '{}_{}__{}'.format(args.cohort, args.gene, args.mut_levels)), exist_ok=True) cdata = load_cohort(args.cohort, [args.gene], args.mut_levels.split('__')) infer_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)) prob_df = infer_df.applymap(np.mean) singl_mtypes = { mtypes for mtypes in prob_df.index if all( len(mtype.subkeys()) == 1 for mtype in mtypes) } pheno_dict, auc_list, _ = compare_scores(infer_df, cdata, get_similarities=False) for singl_mtype in singl_mtypes: plot_mtype_projection(prob_df.loc[[singl_mtype]].iloc[0, :], singl_mtypes, pheno_dict, args, cdata, proj_tag='singleton') plot_mtype_enrichment(prob_df.loc[[singl_mtype]].iloc[0, :], pheno_dict, args, cdata) gain_mtype = (MuType({('Scale', 'Copy'): {('Copy', 'HomGain'): None}}), ) if gain_mtype not in prob_df.index: gain_mtype = (MuType({ ('Scale', 'Copy'): { ('Copy', ('HomGain', 'HetGain')): None } }), ) allpnt_mtype = MuType({('Scale', 'Point'): None}) pnt_mtypes = { mtypes for mtypes in singl_mtypes - {(allpnt_mtype, )} if len(mtypes) == 1 and not (mtypes[0] & allpnt_mtype).is_empty() } pnt_scores = sorted([(mtypes, np.sum(pheno_dict[mtypes]) * (1 - auc_list[mtypes])) for mtypes in pnt_mtypes], key=lambda x: x[1]) pnt_mtype = pnt_scores.pop(0)[0] pnt_str = str(pnt_mtype[0]).split(':')[-1] use_mtypes = [ (MuType({('Scale', 'Copy'): { ('Copy', 'HetGain'): None }}), ), ('Only Not {}'.format(pnt_str), (MuType({('Scale', 'Point'): cdata.train_mut['Point'].allkey()}) - pnt_mtype[0], )), ('Mutation and Gain', (MuType({ ('Scale', 'Copy'): { ('Copy', ('HomGain', 'HetGain')): None } }), allpnt_mtype)) ] use_clrs = { gain_mtype: '#9B5500', pnt_mtype: '#03314C', use_mtypes[0]: '#774200', use_mtypes[1][1]: '#044063', use_mtypes[2][1]: '#4B004E' } plot_pair_projection(prob_df.loc[[gain_mtype, pnt_mtype]], use_mtypes, pheno_dict, args, cdata, proj_clrs=use_clrs)