Esempio n. 1
0
def main():
    """Run all sets of hypotheses experiments."""
    args = get_args()
    ntobj = cx.namedtuple("NtRunParams", "num_experiments num_sims dotsize")
    #pylint: disable=bad-whitespace, no-member
    lst_experiment_cnts = [
        ntobj._make([500, 2500, {
            'fdr_actual': 0.70,
            'sensitivity': 0.40
        }]),  # 00:NN:NN
        ntobj._make([500, 1000, {
            'fdr_actual': 0.90,
            'sensitivity': 0.50
        }]),  # 02:37:NN
        ntobj._make([100, 1000, {
            'fdr_actual': 1.20,
            'sensitivity': 0.65
        }]),  # 00:24:31
        ntobj._make([20, 200, {
            'fdr_actual': 2.00,
            'sensitivity': 2.00
        }]),  # 00:01:00
        ntobj._make([10, 10, {
            'fdr_actual': 2.00,
            'sensitivity': 2.00
        }]),  # 00:00:02
    ]
    ntd = lst_experiment_cnts[args['idx_experiment_cnts']]

    #                     0.01,   0.03,   0.05
    max_sigpvals_super = [0.00001, 0.00001, 0.00001]

    for perc_super in [0, 25, 50, 75]:
        run(args, ntd, max_sigpvals_super, perc_super)
def main():
    """Run all sets of hypotheses experiments."""
    args = get_args()
    ntobj = cx.namedtuple("NtRunParams", "num_experiments num_sims dotsize")
    #pylint: disable=bad-whitespace, no-member
    lst_experiment_cnts = [
        ntobj._make([500, 2500, {
            'fdr_actual': 0.70,
            'sensitivity': 0.40
        }]),  # 00:NN:NN
        ntobj._make([500, 1000, {
            'fdr_actual': 0.90,
            'sensitivity': 0.50
        }]),  # 02:37:NN
        ntobj._make([100, 1000, {
            'fdr_actual': 1.20,
            'sensitivity': 0.65
        }]),  # 00:24:31
        ntobj._make([20, 200, {
            'fdr_actual': 2.00,
            'sensitivity': 2.00
        }]),  # 00:01:00
        ntobj._make([10, 10, {
            'fdr_actual': 2.00,
            'sensitivity': 2.00
        }]),  # 00:00:02
    ]
    ntd = lst_experiment_cnts[args['idx_experiment_cnts']]

    lst_max_sigpvals = [
        [0.01, 0.03, 0.05],
        [0.0001, 0.001, 0.01],
    ]
    max_sigpvals = lst_max_sigpvals[args['idx_max_sigpvals']]

    methods = [  # Time for one ExperimentSet (100, 1000)                  H:MM:SS
        #'bonferroni',     #  0) Bonferroni one-step correction                      0:00:09
        #'sidak',          #  1) Sidak one-step correction                           0:00:09
        #'holm-sidak',     #  2) Holm-Sidak step-down method using Sidak adjustments 0:00:12
        ##holm',           #  3) Holm step-down method using Bonferroni adjustments  2:29:59
        #'simes-hochberg', #  4) Simes-Hochberg step-up method  (independent)        0:00:11
        'hommel',  #  5) Hommel closed method based on Simes tests (non-negative)
        'fdr_by',  #  7) FDR Benjamini/Yekutieli (negative)
        'fdr_tsbh',  #  8) FDR 2-stage Benjamini-Hochberg (non-negative)
        'fdr_tsbky',  #  9) FDR 2-stage Benjamini-Krieger-Yekutieli (non-negative)
        'fdr_gbs',  # 10) FDR adaptive Gavrilov-Benjamini-Sarkar
        'fdr_bh',  #  6) FDR Benjamini/Hochberg  (non-negative)
    ]
    #main(args, ntd.num_experiments, ntd.num_sims, ntd.dotsize)
    for method in methods:
        run(args, ntd, method, max_sigpvals)
def main():
    """Simulate Gene Ontology Enrichment Analyses."""
    args = get_args()
    obj = Basename()

    modules = [
        'pkggosim.data.orig_noprune_enriched_ntn2_p0_100to000_004to124_N00002_00002_genes',
        'pkggosim.data.orig_noprune_enriched_ntn2_p0_100to000_004to124_N00002_00002_goids',
    ]
    # randomize_truenull_assc = args.get('randomize_truenull_assc', 'orig')

    # study_bg = "humoral_rsp"
    # popnullmaskout = ['immune', 'viral_bacteria']
    # # Gene Ontology Data
    # genes_mus = ensm2nt.keys()  # Population genes
    # params = {
    #     'log' : None if ntd.num_experiments > 4 else sys.stdout,
    #     'prefix' : 'fig_goea_{RND}'.format(RND=randomize_truenull_assc),
    #     'randomize_truenull_assc' : randomize_truenull_assc,
    #     'seed' : args.get('randomseed', None),
    #     'alpha' : 0.05,
    #     'method' : 'fdr_bh',
    #     'propagate_counts' : args.get('propagate_counts', False),
    #     'genes_population':genes_mus,
    #     'genes_study_bg':import_genes(study_bg),
    #     'goids_study_bg':import_goids(study_bg),
    #     'genes_popnullmaskout':import_genes_all(popnullmaskout),
    #     'association_file':'gene_association.mgi',
    #     'perc_nulls' : [100, 75, 50, 25, 0],
    #     #'num_genes_list' : [4, 16, 64, 128],
    #     #'num_genes_list' : [4, 8, 16, 24, 32, 40, 48, 56, 64],
    num_genes_list = args['genes']
    #     'num_experiments' : ntd.num_experiments, # Num. of simulated FDR ratios per experiment set
    #     'num_sims' : ntd.num_sims}   # Number of sims per experiment; used to create one FDR ratio
    # objparams = RunParams(params)
    # #### obj = ExperimentsAll(objparams) # RunParams

    # title_cur = objparams.get_title()
    # prt.write("TITLE: {S}\n".format(S=title_cur))
    # prt.write("GENES: {S}\n".format(S=params['num_genes_list']))
    # prt.write("randomize_truenull_assc: {S}\n".format(S=randomize_truenull_assc))
    # prt.write("{NT}\n".format(NT=ntd))

    #### rpt_items = ['fdr_actual', 'sensitivity', 'specificity', 'pos_pred_val', 'neg_pred_val']
    plt_items = ['fdr_actual', 'sensitivity', 'specificity']
    pltargs = {'dotsize':None, 'title':args['title'],
               'dpi':600, 'img':'all',
               'xlabel':'Number of Genes in a Study Group',
               'ylabel':'Percentage of General Population Genes'}
    for mod in modules: 
        obj.plt_mod('log/dat_goea_plot', mod, plt_items, pltargs)
Esempio n. 4
0
def main():
    """Arguments for running all experiments."""
    args = get_args()
    nto = cx.namedtuple("NtRunParams", "num_experiments num_sims dotsize")
    #pylint: disable=bad-whitespace, no-member, line-too-long
    experiment_cnts = [
        nto._make([
            500, 1000, {
                'fdr_actual': 0.70,
                'sensitivity': 0.50,
                'specificity': 0.50
            }
        ]),
        nto._make([
            100, 1000, {
                'fdr_actual': 0.95,
                'sensitivity': 0.60,
                'specificity': 0.60
            }
        ]),
        nto._make([
            100, 30, {
                'fdr_actual': 1.30,
                'sensitivity': 0.60,
                'specificity': 0.60
            }
        ]),
        nto._make([
            50, 50, {
                'fdr_actual': 2.00,
                'sensitivity': 0.70,
                'specificity': 0.70
            }
        ]),
        nto._make([
            50, 20, {
                'fdr_actual': 2.00,
                'sensitivity': 1.00,
                'specificity': 1.00
            }
        ]),  # 4:56
        nto._make([
            20, 20, {
                'fdr_actual': 2.00,
                'sensitivity': 2.00,
                'specificity': 2.00
            }
        ]),  # 1:25
        nto._make([
            4, 4, {
                'fdr_actual': 4.00,
                'sensitivity': 3.00,
                'specificity': 3.00
            }
        ]),  # 0:04 0:05
        nto._make([
            2, 2, {
                'fdr_actual': 4.00,
                'sensitivity': 3.00,
                'specificity': 3.00
            }
        ]),  # 0:01 0:02
    ]
    ntd = experiment_cnts[args['idx_experiment_cnts']]
    run(args, ntd)
Esempio n. 5
0
def main(run_all, prt=sys.stdout):
    """Simulate small set of GOEAs to see full TP/FP/FN/TN gene GO counts."""
    args = get_args()
    #pylint: disable=no-member
    #ntd = nto._make([1, 2, {'fdr_actual':4.00, 'sensitivity':3.00, 'specificity':3.00}]) # 0:01
    ntd = NTO._make(
        [4, 4, {
            'fdr_actual': 4.00,
            'sensitivity': 3.00,
            'specificity': 3.00
        }])
    randomize_truenull_assc = args.get('randomize_truenull_assc', 'orig')
    fout_log = "log/plt_goea_small/plt_goea_small_{NAME}.log".format(
        NAME=randomize_truenull_assc)

    study_bg = "humoral_rsp"
    popnullmaskout = ['immune', 'viral_bacteria']
    # Gene Ontology Data
    with open(os.path.join(REPO, fout_log), 'w') as log:
        params = {
            'img': 'pdf',  # png or pdf
            'title': args['title'],
            'log': log,
            'prefix': 'fig_goea_{RND}'.format(RND=randomize_truenull_assc),
            'randomize_truenull_assc': randomize_truenull_assc,
            'seed': args.get('randomseed', None),
            'alpha': 0.05,
            'method': 'fdr_bh',
            'propagate_counts': args['propagate_counts'],
            'genes_population': ensm2nt.keys(),  # Mouse protein-coding genes
            'genes_study_bg': import_genes(study_bg),
            'goids_study_bg': import_goids(study_bg),
            'genes_popnullmaskout': import_genes_all(popnullmaskout),
            'association_file': 'gene_association.mgi',
            'perc_nulls': [100, 75, 50, 25, 0] if run_all else [25],
            'num_genes_list':
            args['genes'],  # [4, 16, 64, 128] if run_all else [128],
            'num_experiments': ntd.
            num_experiments,  # Num. simulated FDR ratios per experiment set
            'num_sims': ntd.num_sims
        }  # Number sims per experiment; used to create one FDR ratio
        obj = ExperimentsAll(RunParams(params))  # RunParams
        sys.stdout.write("  propagate_counts={P}\n".format(
            P=obj.pobj.params['propagate_counts']))

        title_cur = obj.pobj.get_title()
        prt.write("TITLE: {S}\n".format(S=title_cur))
        prt.write("GENES: {S}\n".format(S=params['num_genes_list']))
        prt.write(
            "randomize_truenull_assc: {S}\n".format(S=randomize_truenull_assc))
        prt.write("{NT}\n".format(NT=ntd))

        rpt_items = [
            'fdr_actual', 'sensitivity', 'specificity', 'pos_pred_val',
            'neg_pred_val'
        ]
        plt_items = ['fdr_actual', 'sensitivity', 'specificity']
        pltargs = {
            'dotsize': ntd.dotsize,
            'title': title_cur,
            'xlabel': 'Number of Genes in a Study Group',
            'ylabel': 'Percentage of General Population Genes',
            #'ylim':{'fdr_actual':[-0.005, 0.50]}
        }
        obj.run_all(study_bg, rpt_items, plt_items, **pltargs)
        obj.pobj.prt_summary(log)
        obj.pobj.prt_summary(sys.stdout)
        sys.stdout.write("  propagate_counts={P}\n".format(
            P=obj.pobj.params['propagate_counts']))
        sys.stdout.write("  WROTE: {LOG}\n".format(LOG=fout_log))
    sys.stdout.write("ARGS: {ARGS}\n".format(ARGS=args))