コード例 #1
0
def do_bootstrap():
    """
    Do a bootstrap analysis on GO p-values.
    """
    logging.info(
        'Running GO bootstrap analysis with %d samples: topGO method=%s',
        options.num_bootstrap_samples, options.topgo_method)
    transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps(
    )
    genes_2_GO, go_context = go.initialise_go_context(factor_universe,
                                                      target_universe,
                                                      options.go_ontologies)
    tp_sizes = filter(
        None, map(len, (tp.targets for tp in transcriptional_programs)))
    p_values = list()
    for sample in generate_bootstrap_samples(options.num_bootstrap_samples,
                                             target_universe, tp_sizes):
        go_analysis = dict(
            (ontology,
             go.try_go_analysis(go_data, sample, 1., options.topgo_method))
            for ontology, go_data in go_context.targets_go_data.iteritems())
        best_p_value = min(
            map(topgo.p_value_from_r,
                map(topgo.best_p_value, go_analysis.values())))
        p_values.append(best_p_value)
    logging.info('GO bootstrap analysis completed')
    return p_values
コード例 #2
0
def symatlas():
    logging.info('Analysing SymAtlas expression data.')
    probes_to_genes = SA.probes_to_genes()
    dataset, tissues, probes, fold_changes = SA.expression_data()
    highly_expressed = SA.fold_change_above_median(fold_changes, fold_change=options.symatlas_fold_change_threshold)
    highly_expressed_probes = SA.match_probe_sets(highly_expressed.T, probes)
    highly_expressed_genes = [
        set(probes_to_genes[p] for p in hep if p in probes_to_genes and probes_to_genes[p])
        for hep in highly_expressed_probes
    ]
    if False:
        import pylab as P
        P.figure()
        P.bar(range(len(tissues)), map(len, highly_expressed_probes))
        P.xlim(max=len(tissues))
    transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps()
    tester = gene_set_enrichment.TpEnrichmentTester(factor_universe, target_universe, p_value_threshold=1e-3)
    for tissue, tissue_genes in zip(tissues, highly_expressed_genes):
        logging.debug('Testing %d transcriptional programs\' factors for enrichment in genes over-expressed in tissue %s', len(transcriptional_programs), tissue)
        for tp, (test_drawn, test_size, test_complement_size, draws, p_value) in tester.test_transcriptional_program_factors(transcriptional_programs, tissue_genes):
            logging.info(
                'TP:%4d; %4d in program; %4d/%5d over-expressed in % -32s; %4d in intersection; p-value=%e',
                tp.k, draws, test_size, test_complement_size+test_size, tissue, test_drawn, p_value
            )
        for tp, (test_drawn, test_size, test_complement_size, draws, p_value) in tester.test_transcriptional_program_targets(transcriptional_programs, tissue_genes):
            logging.info(
                'TP:%4d; %4d in program; %4d/%5d over-expressed in % -32s; %4d in intersection; p-value=%g',
                tp.k, draws, test_size, test_complement_size+test_size, tissue, test_drawn, p_value
            )
コード例 #3
0
ファイル: do_bootstrap.py プロジェクト: JohnReid/biopsy
def do_bootstrap():
    """
    Do a bootstrap analysis on GO p-values.
    """
    logging.info('Running GO bootstrap analysis with %d samples: topGO method=%s', options.num_bootstrap_samples, options.topgo_method)
    transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps()
    genes_2_GO, go_context = go.initialise_go_context(factor_universe, target_universe, options.go_ontologies)
    tp_sizes = filter(None, map(len, (tp.targets for tp in transcriptional_programs)))
    p_values = list()
    for sample in generate_bootstrap_samples(options.num_bootstrap_samples, target_universe, tp_sizes):
        go_analysis = dict(
            (
                ontology,
                go.try_go_analysis(
                    go_data,
                    sample,
                    1.,
                    options.topgo_method
                )
            )
            for ontology, go_data
            in go_context.targets_go_data.iteritems()
        )
        best_p_value = min(map(topgo.p_value_from_r, map(topgo.best_p_value, go_analysis.values())))
        p_values.append(best_p_value)
    logging.info('GO bootstrap analysis completed')
    return p_values
コード例 #4
0
ファイル: validate.py プロジェクト: JohnReid/biopsy
def validiate_program_28():
    factor_validation_sets, target_validation_sets = generate_validation_sets()
    transcriptional_programs, factor_universe, target_universe = threshold_tps()
    factor_universe = set(factor_universe)
    target_universe = set(target_universe)
    restrict_validation_sets(factor_validation_sets.values(), factor_universe)
    restrict_validation_sets(target_validation_sets.values(), target_universe)
    tp = transcriptional_programs[28]
    for name, reference_set in target_validation_sets.iteritems():
        validate_tp_set(tp.k, "Factors", tp.targets, name, reference_set, target_universe)
コード例 #5
0
def validiate_program_28():
    factor_validation_sets, target_validation_sets = generate_validation_sets()
    transcriptional_programs, factor_universe, target_universe = threshold_tps(
    )
    factor_universe = set(factor_universe)
    target_universe = set(target_universe)
    restrict_validation_sets(factor_validation_sets.values(), factor_universe)
    restrict_validation_sets(target_validation_sets.values(), target_universe)
    tp = transcriptional_programs[28]
    for name, reference_set in target_validation_sets.iteritems():
        validate_tp_set(tp.k, 'Factors', tp.targets, name, reference_set,
                        target_universe)
コード例 #6
0
ファイル: validate.py プロジェクト: JohnReid/biopsy
def validate_one_set(validation_sets, latex_f=None):
    transcriptional_programs, factor_universe, target_universe = threshold_tps()
    target_universe = set(target_universe)
    restrict_validation_sets(validation_sets.values(), target_universe)
    overall_p_values = list()
    for tp in transcriptional_programs:
        # logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets))
        tp_p_values = list()
        for name, reference_set in validation_sets.iteritems():
            validate_tp_set(tp.k, "Targets", tp.targets, name, reference_set, target_universe, latex_f, tp_p_values)
        overall_p_values.append(
            lou_jost_multiple_p_value_adjustment(reduce(float.__mul__, tp_p_values), len(tp_p_values))
        )
    return overall_p_values
コード例 #7
0
def validate_one_set(validation_sets, latex_f=None):
    transcriptional_programs, factor_universe, target_universe = threshold_tps(
    )
    target_universe = set(target_universe)
    restrict_validation_sets(validation_sets.values(), target_universe)
    overall_p_values = list()
    for tp in transcriptional_programs:
        #logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets))
        tp_p_values = list()
        for name, reference_set in validation_sets.iteritems():
            validate_tp_set(tp.k, 'Targets', tp.targets, name, reference_set,
                            target_universe, latex_f, tp_p_values)
        overall_p_values.append(
            lou_jost_multiple_p_value_adjustment(
                reduce(float.__mul__, tp_p_values), len(tp_p_values)))
    return overall_p_values
コード例 #8
0
ファイル: validate.py プロジェクト: JohnReid/biopsy
def validate():
    """
    Validate the transcriptional programs against the validation sets.
    """
    factor_validation_sets, target_validation_sets = generate_validation_sets()
    transcriptional_programs, factor_universe, target_universe = threshold_tps()
    factor_universe = set(factor_universe)
    target_universe = set(target_universe)
    restrict_validation_sets(factor_validation_sets.values(), factor_universe)
    restrict_validation_sets(target_validation_sets.values(), target_universe)
    latex_f = open(os.path.join(options.output_dir, "validation.tex"), "w")
    for tp in transcriptional_programs:
        # logging.info('Validating transcriptional program %d against %d sets of factors.', tp.k, len(factor_validation_sets))
        for name, reference_set in factor_validation_sets.iteritems():
            validate_tp_set(tp.k, "Factors", tp.factors, name, reference_set, factor_universe, latex_f)
        # logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets))
        for name, reference_set in target_validation_sets.iteritems():
            validate_tp_set(tp.k, "Targets", tp.targets, name, reference_set, target_universe, latex_f)
    latex_f.close()
コード例 #9
0
def go_analysis():
    logging.info('Running GO analysis: p-value threshold=%e; topGO method=%s',
                 options.go_p_value_threshold, options.topgo_method)
    transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps(
    )
    genes_2_GO, go_context = go.initialise_go_context(factor_universe,
                                                      target_universe,
                                                      options.go_ontologies)
    go_analyses = list()
    f = open(os.path.join(options.output_dir, 'go-analyses.txt'), 'w')
    for tp in transcriptional_programs:
        go_analysis = go.TPGoAnalysis(tp, go_context,
                                      options.go_p_value_threshold,
                                      options.topgo_method)
        go_analysis.print_go_analyses(f,
                                      options.go_p_value_threshold,
                                      log=True)
        go_analysis.print_go_analyses(sys.stdout, options.go_p_value_threshold)
        go_analyses.append(go_analysis)
    f.close()
    write_latex(go_analyses)
コード例 #10
0
def validate():
    """
    Validate the transcriptional programs against the validation sets.
    """
    factor_validation_sets, target_validation_sets = generate_validation_sets()
    transcriptional_programs, factor_universe, target_universe = threshold_tps(
    )
    factor_universe = set(factor_universe)
    target_universe = set(target_universe)
    restrict_validation_sets(factor_validation_sets.values(), factor_universe)
    restrict_validation_sets(target_validation_sets.values(), target_universe)
    latex_f = open(os.path.join(options.output_dir, 'validation.tex'), 'w')
    for tp in transcriptional_programs:
        #logging.info('Validating transcriptional program %d against %d sets of factors.', tp.k, len(factor_validation_sets))
        for name, reference_set in factor_validation_sets.iteritems():
            validate_tp_set(tp.k, 'Factors', tp.factors, name, reference_set,
                            factor_universe, latex_f)
        #logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets))
        for name, reference_set in target_validation_sets.iteritems():
            validate_tp_set(tp.k, 'Targets', tp.targets, name, reference_set,
                            target_universe, latex_f)
    latex_f.close()
コード例 #11
0
def print_latex(go_analyses, f=sys.stdout, factors=True):
    transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps(
    )
    t = factors and 'Factors' or 'Targets'
    print >> f, '%% %s' % t
    print >> f, 'TP & %s & GO term & & GO description & \\multicolumn{2}{c}{annotated} & $p$-score \\\\' % t
    print >> f, '\\hline'
    for k, (tp, tp_analysis) in enumerate(
            zip(transcriptional_programs, go_analyses)):
        if factors:
            type_analysis = tp_analysis.factors_go_analysis
            size = len(tp.factors)
        else:
            type_analysis = tp_analysis.targets_go_analysis
            size = len(tp.targets)
        for ontology, analysis in type_analysis.iteritems():
            if None != analysis:
                for go_id, go_term, annotated, significant, expected, pvalue in topgo.yield_stats(
                        analysis):
                    # Program & Factors & GO term & & GO description & \multicolumn{2}{c}{annotated} & $p$-score \\
                    print >> f, '% 4d & %5d & %s & %s & %42s & % 4d & %-4d & %.1e \\\\' % (
                        tp.k, size, go_id, ontology, go_term, significant,
                        annotated, pvalue)
    print >> f, '\\\\'