def do_bootstrap(): """ Do a bootstrap analysis on GO p-values. """ logging.info( 'Running GO bootstrap analysis with %d samples: topGO method=%s', options.num_bootstrap_samples, options.topgo_method) transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps( ) genes_2_GO, go_context = go.initialise_go_context(factor_universe, target_universe, options.go_ontologies) tp_sizes = filter( None, map(len, (tp.targets for tp in transcriptional_programs))) p_values = list() for sample in generate_bootstrap_samples(options.num_bootstrap_samples, target_universe, tp_sizes): go_analysis = dict( (ontology, go.try_go_analysis(go_data, sample, 1., options.topgo_method)) for ontology, go_data in go_context.targets_go_data.iteritems()) best_p_value = min( map(topgo.p_value_from_r, map(topgo.best_p_value, go_analysis.values()))) p_values.append(best_p_value) logging.info('GO bootstrap analysis completed') return p_values
def symatlas(): logging.info('Analysing SymAtlas expression data.') probes_to_genes = SA.probes_to_genes() dataset, tissues, probes, fold_changes = SA.expression_data() highly_expressed = SA.fold_change_above_median(fold_changes, fold_change=options.symatlas_fold_change_threshold) highly_expressed_probes = SA.match_probe_sets(highly_expressed.T, probes) highly_expressed_genes = [ set(probes_to_genes[p] for p in hep if p in probes_to_genes and probes_to_genes[p]) for hep in highly_expressed_probes ] if False: import pylab as P P.figure() P.bar(range(len(tissues)), map(len, highly_expressed_probes)) P.xlim(max=len(tissues)) transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps() tester = gene_set_enrichment.TpEnrichmentTester(factor_universe, target_universe, p_value_threshold=1e-3) for tissue, tissue_genes in zip(tissues, highly_expressed_genes): logging.debug('Testing %d transcriptional programs\' factors for enrichment in genes over-expressed in tissue %s', len(transcriptional_programs), tissue) for tp, (test_drawn, test_size, test_complement_size, draws, p_value) in tester.test_transcriptional_program_factors(transcriptional_programs, tissue_genes): logging.info( 'TP:%4d; %4d in program; %4d/%5d over-expressed in % -32s; %4d in intersection; p-value=%e', tp.k, draws, test_size, test_complement_size+test_size, tissue, test_drawn, p_value ) for tp, (test_drawn, test_size, test_complement_size, draws, p_value) in tester.test_transcriptional_program_targets(transcriptional_programs, tissue_genes): logging.info( 'TP:%4d; %4d in program; %4d/%5d over-expressed in % -32s; %4d in intersection; p-value=%g', tp.k, draws, test_size, test_complement_size+test_size, tissue, test_drawn, p_value )
def do_bootstrap(): """ Do a bootstrap analysis on GO p-values. """ logging.info('Running GO bootstrap analysis with %d samples: topGO method=%s', options.num_bootstrap_samples, options.topgo_method) transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps() genes_2_GO, go_context = go.initialise_go_context(factor_universe, target_universe, options.go_ontologies) tp_sizes = filter(None, map(len, (tp.targets for tp in transcriptional_programs))) p_values = list() for sample in generate_bootstrap_samples(options.num_bootstrap_samples, target_universe, tp_sizes): go_analysis = dict( ( ontology, go.try_go_analysis( go_data, sample, 1., options.topgo_method ) ) for ontology, go_data in go_context.targets_go_data.iteritems() ) best_p_value = min(map(topgo.p_value_from_r, map(topgo.best_p_value, go_analysis.values()))) p_values.append(best_p_value) logging.info('GO bootstrap analysis completed') return p_values
def validiate_program_28(): factor_validation_sets, target_validation_sets = generate_validation_sets() transcriptional_programs, factor_universe, target_universe = threshold_tps() factor_universe = set(factor_universe) target_universe = set(target_universe) restrict_validation_sets(factor_validation_sets.values(), factor_universe) restrict_validation_sets(target_validation_sets.values(), target_universe) tp = transcriptional_programs[28] for name, reference_set in target_validation_sets.iteritems(): validate_tp_set(tp.k, "Factors", tp.targets, name, reference_set, target_universe)
def validiate_program_28(): factor_validation_sets, target_validation_sets = generate_validation_sets() transcriptional_programs, factor_universe, target_universe = threshold_tps( ) factor_universe = set(factor_universe) target_universe = set(target_universe) restrict_validation_sets(factor_validation_sets.values(), factor_universe) restrict_validation_sets(target_validation_sets.values(), target_universe) tp = transcriptional_programs[28] for name, reference_set in target_validation_sets.iteritems(): validate_tp_set(tp.k, 'Factors', tp.targets, name, reference_set, target_universe)
def validate_one_set(validation_sets, latex_f=None): transcriptional_programs, factor_universe, target_universe = threshold_tps() target_universe = set(target_universe) restrict_validation_sets(validation_sets.values(), target_universe) overall_p_values = list() for tp in transcriptional_programs: # logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets)) tp_p_values = list() for name, reference_set in validation_sets.iteritems(): validate_tp_set(tp.k, "Targets", tp.targets, name, reference_set, target_universe, latex_f, tp_p_values) overall_p_values.append( lou_jost_multiple_p_value_adjustment(reduce(float.__mul__, tp_p_values), len(tp_p_values)) ) return overall_p_values
def validate_one_set(validation_sets, latex_f=None): transcriptional_programs, factor_universe, target_universe = threshold_tps( ) target_universe = set(target_universe) restrict_validation_sets(validation_sets.values(), target_universe) overall_p_values = list() for tp in transcriptional_programs: #logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets)) tp_p_values = list() for name, reference_set in validation_sets.iteritems(): validate_tp_set(tp.k, 'Targets', tp.targets, name, reference_set, target_universe, latex_f, tp_p_values) overall_p_values.append( lou_jost_multiple_p_value_adjustment( reduce(float.__mul__, tp_p_values), len(tp_p_values))) return overall_p_values
def validate(): """ Validate the transcriptional programs against the validation sets. """ factor_validation_sets, target_validation_sets = generate_validation_sets() transcriptional_programs, factor_universe, target_universe = threshold_tps() factor_universe = set(factor_universe) target_universe = set(target_universe) restrict_validation_sets(factor_validation_sets.values(), factor_universe) restrict_validation_sets(target_validation_sets.values(), target_universe) latex_f = open(os.path.join(options.output_dir, "validation.tex"), "w") for tp in transcriptional_programs: # logging.info('Validating transcriptional program %d against %d sets of factors.', tp.k, len(factor_validation_sets)) for name, reference_set in factor_validation_sets.iteritems(): validate_tp_set(tp.k, "Factors", tp.factors, name, reference_set, factor_universe, latex_f) # logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets)) for name, reference_set in target_validation_sets.iteritems(): validate_tp_set(tp.k, "Targets", tp.targets, name, reference_set, target_universe, latex_f) latex_f.close()
def go_analysis(): logging.info('Running GO analysis: p-value threshold=%e; topGO method=%s', options.go_p_value_threshold, options.topgo_method) transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps( ) genes_2_GO, go_context = go.initialise_go_context(factor_universe, target_universe, options.go_ontologies) go_analyses = list() f = open(os.path.join(options.output_dir, 'go-analyses.txt'), 'w') for tp in transcriptional_programs: go_analysis = go.TPGoAnalysis(tp, go_context, options.go_p_value_threshold, options.topgo_method) go_analysis.print_go_analyses(f, options.go_p_value_threshold, log=True) go_analysis.print_go_analyses(sys.stdout, options.go_p_value_threshold) go_analyses.append(go_analysis) f.close() write_latex(go_analyses)
def validate(): """ Validate the transcriptional programs against the validation sets. """ factor_validation_sets, target_validation_sets = generate_validation_sets() transcriptional_programs, factor_universe, target_universe = threshold_tps( ) factor_universe = set(factor_universe) target_universe = set(target_universe) restrict_validation_sets(factor_validation_sets.values(), factor_universe) restrict_validation_sets(target_validation_sets.values(), target_universe) latex_f = open(os.path.join(options.output_dir, 'validation.tex'), 'w') for tp in transcriptional_programs: #logging.info('Validating transcriptional program %d against %d sets of factors.', tp.k, len(factor_validation_sets)) for name, reference_set in factor_validation_sets.iteritems(): validate_tp_set(tp.k, 'Factors', tp.factors, name, reference_set, factor_universe, latex_f) #logging.info('Validating transcriptional program %d against %d sets of targets.', tp.k, len(target_validation_sets)) for name, reference_set in target_validation_sets.iteritems(): validate_tp_set(tp.k, 'Targets', tp.targets, name, reference_set, target_universe, latex_f) latex_f.close()
def print_latex(go_analyses, f=sys.stdout, factors=True): transcriptional_programs, factor_universe, target_universe = tp_threshold.threshold_tps( ) t = factors and 'Factors' or 'Targets' print >> f, '%% %s' % t print >> f, 'TP & %s & GO term & & GO description & \\multicolumn{2}{c}{annotated} & $p$-score \\\\' % t print >> f, '\\hline' for k, (tp, tp_analysis) in enumerate( zip(transcriptional_programs, go_analyses)): if factors: type_analysis = tp_analysis.factors_go_analysis size = len(tp.factors) else: type_analysis = tp_analysis.targets_go_analysis size = len(tp.targets) for ontology, analysis in type_analysis.iteritems(): if None != analysis: for go_id, go_term, annotated, significant, expected, pvalue in topgo.yield_stats( analysis): # Program & Factors & GO term & & GO description & \multicolumn{2}{c}{annotated} & $p$-score \\ print >> f, '% 4d & %5d & %s & %s & %42s & % 4d & %-4d & %.1e \\\\' % ( tp.k, size, go_id, ontology, go_term, significant, annotated, pvalue) print >> f, '\\\\'