Example #1
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')

    parser.add_argument('mut_levels',
                        default='Form_base__Exon',
                        help='a set of mutation annotation levels')
    parser.add_argument('--samp_cutoff', default=20)

    # parse command line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(os.path.join(plot_dir, '{}_{}'.format(args.cohort, args.gene)),
                exist_ok=True)

    cdata = load_cohort(args.cohort, [args.gene], args.mut_levels.split('__'))
    pheno_dict, auc_list, simil_df = compare_scores(
        load_infer_output(
            os.path.join(base_dir, 'output', args.cohort, args.gene,
                         args.classif, 'samps_{}'.format(args.samp_cutoff),
                         args.mut_levels)), cdata)

    plot_similarity_scatter(simil_df.copy(), auc_list.copy(),
                            pheno_dict.copy(), args)
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the ordering of the subtypes of a module of genes in a given "
        "cohort based on how their isolated expression signatures classify "
        "one another.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels',
                        type=str,
                        help='a set of mutation annotation levels')
    parser.add_argument('genes',
                        type=str,
                        nargs='+',
                        help='a list of mutated genes')
    parser.add_argument('--samp_cutoff', type=int, default=20)

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = syn_root
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=args.genes,
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=expr_dir,
                           var_source='mc3',
                           copy_source='Firehose',
                           domain_dir=domain_dir,
                           annot_file=annot_file,
                           syn=syn,
                           cv_prop=1.0)

    pheno_dict, auc_list, simil_df = compare_scores(
        load_infer_output(
            os.path.join(base_dir, 'output', args.cohort,
                         '_'.join(sorted(args.genes)), args.classif,
                         'samps_{}'.format(args.samp_cutoff),
                         args.mut_levels)), cdata)

    simil_rank = simil_df.mean(axis=1) - simil_df.mean(axis=0)
    simil_order = [
        mtypes for mtypes, _ in sorted(tuple(simil_rank.iteritems()),
                                       key=lambda k:
                                       (k[0][0].subtype_list()[0][0], k[1]))
    ]

    simil_df = simil_df.loc[simil_order, simil_order[::-1]]
    plot_singleton_ordering(simil_df.copy(), auc_list.copy(),
                            pheno_dict.copy(), args)
    plot_singleton_clustering(simil_df.copy(), auc_list.copy(),
                              pheno_dict.copy(), args)
    plot_all_clustering(simil_df.copy(), auc_list.copy(), args)
Example #3
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the ordering of a gene's subtypes in a given cohort based on "
        "how their isolated expression signatures classify one another.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels',
                        default='Form_base__Exon',
                        help='a set of mutation annotation levels')
    parser.add_argument('--samp_cutoff', default=20)

    parser.add_argument('--all_mcombs',
                        '-a',
                        action='store_true',
                        help=("plot results for all mutation types as "
                              "opposed to just singletons"))

    # parse command line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(os.path.join(plot_dir, '{}_{}'.format(args.cohort, args.gene)),
                exist_ok=True)

    cdata = load_cohort_data(base_dir, args.cohort, args.gene, args.mut_levels)
    infer_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif,
                     'samps_{}'.format(args.samp_cutoff), args.mut_levels))

    if args.all_mcombs:
        use_mtypes = infer_df.index
    else:
        use_mtypes = [
            mcomb for mcomb in infer_df.index if all(
                len(mtype.subkeys()) == 1 for mtype in mcomb.mtypes)
        ]

    pheno_dict, auc_list, simil_df = compare_scores(infer_df.loc[use_mtypes],
                                                    cdata)

    plot_singleton_ordering(simil_df.copy(), auc_list.copy(),
                            pheno_dict.copy(), args)
    plot_singleton_clustering(simil_df.copy(), auc_list.copy(),
                              pheno_dict.copy(), args)

    if args.all_mcombs:
        plot_all_ordering(simil_df.copy(), auc_list.copy(), args)
        plot_all_clustering(simil_df.copy(), auc_list.copy(), args)
Example #4
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the ordering of a gene's subtypes in a given cohort based on "
        "how their isolated expression signatures classify one another.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels',
                        type=str,
                        help='a set of mutation annotation levels')
    parser.add_argument('genes',
                        type=str,
                        nargs='+',
                        help='a list of mutated genes')
    parser.add_argument('--samp_cutoff', type=int, default=25)

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=args.genes,
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           syn=syn,
                           cv_prop=1.0)

    simil_df, auc_list = get_similarities(
        load_infer_output(
            os.path.join(base_dir, 'output', args.cohort,
                         '_'.join(sorted(args.genes)), args.classif,
                         'samps_{}'.format(args.samp_cutoff),
                         args.mut_levels)), args.genes, cdata)
    print(simil_df.shape)

    simil_rank = simil_df.mean(axis=1) - simil_df.mean(axis=0)
    simil_order = simil_rank.sort_values().index
    simil_df = simil_df.loc[simil_order, reversed(simil_order)]

    plot_singleton_ordering(simil_df.copy(), auc_list.copy(), args, cdata)
    plot_all_ordering(simil_df.copy(), auc_list.copy(), args, cdata)
Example #5
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the positions predicted for each sample in a given cohort by a "
        "multi-task model trained on pairs of mutation subtypes of a gene in "
        "two-dimensional inferred label space."
        )

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('mut_levels',
                        help='a set of mutation annotation levels')

    parser.add_argument('model_name', help='a Stan multi-task learning model')
    parser.add_argument('solve_method', choices=['optim', 'variat', 'sampl'],
                        help='method used to obtain Stan parameter estimates')

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(
        os.path.join(plot_dir, args.cohort, args.gene, args.mut_levels),
        exist_ok=True
        )

    multi_df = load_infer_output(os.path.join(
        base_dir, 'output', args.cohort, args.gene, args.mut_levels,
        args.model_name, args.solve_method
        ))

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort, mut_genes=[args.gene],
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose', expr_dir=firehose_dir,
                           syn=syn, cv_prop=1.0)

    for (mtype1, mtype2), infer_vals in multi_df.iterrows():
        plot_position(infer_vals, args, cdata, mtype1, mtype2)
Example #6
0
def main():
    parser = argparse.ArgumentParser(
        description='Plot experiment results for given mutation classifier.')

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels', default='Form_base__Exon')
    parser.add_argument('--samp_cutoff', default=20)

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(os.path.join(plot_dir, args.cohort, args.gene), exist_ok=True)

    prob_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif,
                     'samps_{}'.format(args.samp_cutoff),
                     args.mut_levels)).applymap(np.mean)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=None,
                           samp_cutoff=20,
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           syn=syn,
                           cv_prop=1.0)

    singl_mtypes = [
        mtype for mtype in prob_df.index if len(mtype.subkeys()) == 1
    ]

    for singl_mtype in singl_mtypes:
        plot_mtype_positions(prob_df.loc[singl_mtype, :], args, cdata)
Example #7
0
def main():
    parser = argparse.ArgumentParser(
        "Plot how well expression signatures separate isolated mutation "
        "subtypes from non-mutated samples relative to how they separate "
        "mutated samples not belonging to the subtype.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')

    parser.add_argument('mut_levels',
                        default='Form_base__Exon',
                        help='a set of mutation annotation levels')
    parser.add_argument('--samp_cutoff', type=int, default=20)

    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = syn_root
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=[args.gene],
                           mut_levels=args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           syn=syn,
                           cv_prop=1.0)

    infer_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif,
                     'samps_{}'.format(args.samp_cutoff), args.mut_levels))
    auc_vals, sep_vals, prop_vals = get_separation(infer_df, args, cdata)

    plot_separation(auc_vals, sep_vals, prop_vals, args, cdata)
Example #8
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the ordering of the simplest subtypes within a module of genes "
        "in a given cohort based on how their isolated expression signatures "
        "classify one another.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels',
                        type=str,
                        help='a set of mutation annotation levels')
    parser.add_argument('genes',
                        type=str,
                        nargs='+',
                        help='a list of mutated genes')
    parser.add_argument('--samp_cutoff', type=int, default=25)

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=args.genes,
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           syn=syn,
                           cv_prop=1.0)

    infer_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort,
                     '_'.join(sorted(args.genes)), args.classif,
                     'samps_{}'.format(args.samp_cutoff), args.mut_levels))

    base_pheno = np.array(
        cdata.train_pheno(MuType({('Gene', tuple(args.genes)): None})))
    auc_list = get_aucs(infer_df, base_pheno,
                        cdata).sort_values(ascending=False)
    auc_list = auc_list[auc_list > 0.6]

    mtype_lens = {mtype: len(mtype.subkeys()) for mtype in auc_list.index}
    mtype_list = sorted(auc_list.index, key=lambda mtype: mtype_lens[mtype])

    mtype_genes = {
        mtype: mtype.subtype_list()[0][0]
        for mtype in auc_list.index
    }
    mtype_samps = {
        mtype: mtype.get_samples(cdata.train_mut)
        for mtype in auc_list.index
    }

    plot_mtypes = reduce(or_, [
        set([mtype for mtype in mtype_list if mtype_genes[mtype] == gene][:3])
        for gene in args.genes
    ])

    ovlp_threshold = 0.5
    i = j = 1
    while len(plot_mtypes) <= 15:
        ovlp_score = min(
            len(mtype_samps[mtype_list[i]] ^ mtype_samps[plot_mtype]) /
            max(len(mtype_samps[mtype_list[i]]), len(mtype_samps[plot_mtype]))
            for plot_mtype in plot_mtypes)

        if ovlp_score >= ovlp_threshold:
            plot_mtypes |= {mtype_list[i]}

        i += 1
        if i >= len(mtype_list):
            j += 1
            i = j
            ovlp_threshold **= 4 / 3

    simil_df = get_similarities(infer_df.loc[plot_mtypes, :], base_pheno,
                                cdata)
    plot_gene_ordering(simil_df, auc_list, args, cdata)
Example #9
0
def main():
    parser = argparse.ArgumentParser(
        description='Plot experiment results for given mutation classifier.')

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels',
                        default='Form_base__Exon',
                        help='a set of mutation annotation levels')
    parser.add_argument('--samp_cutoff', default=20)

    # parse command line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(os.path.join(
        plot_dir, '{}_{}__{}'.format(args.cohort, args.gene, args.mut_levels)),
                exist_ok=True)

    cdata = load_cohort(args.cohort, [args.gene], args.mut_levels.split('__'))
    infer_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif,
                     'samps_{}'.format(args.samp_cutoff), args.mut_levels))

    prob_df = infer_df.applymap(np.mean)
    singl_mtypes = {
        mtypes
        for mtypes in prob_df.index if all(
            len(mtype.subkeys()) == 1 for mtype in mtypes)
    }
    pheno_dict, auc_list, _ = compare_scores(infer_df,
                                             cdata,
                                             get_similarities=False)

    for singl_mtype in singl_mtypes:
        plot_mtype_projection(prob_df.loc[[singl_mtype]].iloc[0, :],
                              singl_mtypes,
                              pheno_dict,
                              args,
                              cdata,
                              proj_tag='singleton')

        plot_mtype_enrichment(prob_df.loc[[singl_mtype]].iloc[0, :],
                              pheno_dict, args, cdata)

    gain_mtype = (MuType({('Scale', 'Copy'): {('Copy', 'HomGain'): None}}), )
    if gain_mtype not in prob_df.index:
        gain_mtype = (MuType({
            ('Scale', 'Copy'): {
                ('Copy', ('HomGain', 'HetGain')): None
            }
        }), )

    allpnt_mtype = MuType({('Scale', 'Point'): None})
    pnt_mtypes = {
        mtypes
        for mtypes in singl_mtypes - {(allpnt_mtype, )}
        if len(mtypes) == 1 and not (mtypes[0] & allpnt_mtype).is_empty()
    }

    pnt_scores = sorted([(mtypes, np.sum(pheno_dict[mtypes]) *
                          (1 - auc_list[mtypes])) for mtypes in pnt_mtypes],
                        key=lambda x: x[1])

    pnt_mtype = pnt_scores.pop(0)[0]
    pnt_str = str(pnt_mtype[0]).split(':')[-1]

    use_mtypes = [
        (MuType({('Scale', 'Copy'): {
                     ('Copy', 'HetGain'): None
                 }}), ),
        ('Only Not {}'.format(pnt_str),
         (MuType({('Scale', 'Point'): cdata.train_mut['Point'].allkey()}) -
          pnt_mtype[0], )),
        ('Mutation and Gain', (MuType({
            ('Scale', 'Copy'): {
                ('Copy', ('HomGain', 'HetGain')): None
            }
        }), allpnt_mtype))
    ]

    use_clrs = {
        gain_mtype: '#9B5500',
        pnt_mtype: '#03314C',
        use_mtypes[0]: '#774200',
        use_mtypes[1][1]: '#044063',
        use_mtypes[2][1]: '#4B004E'
    }

    plot_pair_projection(prob_df.loc[[gain_mtype, pnt_mtype]],
                         use_mtypes,
                         pheno_dict,
                         args,
                         cdata,
                         proj_clrs=use_clrs)