def cleanUserBackgrounds(infile, outfile): ''' Removes duplicates from user specified backgrounds and converts IDs to ensemblg. ''' idtype = PARAMS['id_type'] dbname = PARAMS['db_name'] PipelineEnrichment.cleanGeneLists(infile, outfile, idtype, dbname, submit=True)
def cleanForegrounds(infile, outfile): ''' Removes duplicates from the foreground and converts IDs to ensemblg. ''' idtype = PARAMS['id_type'] dbname = PARAMS['db_name'] E.info(idtype) PipelineEnrichment.cleanGeneLists(infile, outfile, idtype, dbname, submit=True)
def mapUnmappedAnnotations(outfiles): ''' Allows the user to easily add annotations not in the database. Requires an "annotations2annotations.py" command specified in the pipeline.ini providing details of a flat file containing the necessary information to build the AnnotationSet. ''' ua = "annotations.dir/" outstem = outfiles[0].replace(ua, "") outstem = outstem.replace("_genestoterms.tsv", "") substatement = unmapped[outstem] outstem2 = outfiles[0].replace("_genestoterms.tsv", "") PipelineEnrichment.getFlatFileAnnotations(substatement, outstem2, dbname)
def getDBAnnotations(infile, outfiles): ''' Takes a database (generated using get_gene_annotations.py) and uses this to build a series of AnnotationSets. One AnnotationSet is generated for each table in the database with the $annot suffix. AnnotationSets are stored in output files in the annotations.dir directory. ''' dbname = PARAMS['db_name'] PipelineEnrichment.getDBAnnotations(infile, outfiles, dbname, submit=True)
def buildHPABackground(outfile): ''' Builds a background geneset based on human protein atlas expression values specified in pipeline.ini - allows the user to use a tissue specific background ''' tissue = outfile.split("/")[1].split("_")[0].replace("_", " ") PipelineEnrichment.HPABackground(tissue, PARAMS['hpa_minlevel'], PARAMS['hpa_supportive'], outfile, submit=True)
def foregroundsVsBackgrounds(infiles, outfiles): ''' Takes every possible set of one foreground, one background and one AnnotationSet and performs enrichment analysis. Analysis is performed based on the "stats" parameters in the pipeline.ini. Results are written to tab delimited files in results.dir, the _sig.tsv output file contains signficantly enriched terms only, the other output file contains all terms. ''' PipelineEnrichment.foregroundsVsBackgrounds(infiles, outfiles[0], outfiles[1], PARAMS['stats_testtype'], PARAMS['stats_runtype'], PARAMS['stats_correction'], PARAMS['stats_thresh'], dbname, int(PARAMS['stats_writegenes']), PARAMS['db_species'], int(PARAMS['stats_ngenes']), PARAMS['id_type'], submit=True)
def foregroundsVsBackgrounds(infiles, outfiles): ''' Takes every possible set of one foreground, one background and one AnnotationSet and performs enrichment analysis. Analysis is performed based on the "stats" parameters in the pipeline.ini. Results are written to tab delimited files in results.dir, the _sig.tsv output file contains signficantly enriched terms only, the other output file contains all terms. ''' PipelineEnrichment.foregroundsVsBackgrounds(infiles, outfiles[0], outfiles[1], PARAMS['stats_testtype'], PARAMS['stats_runtype'], PARAMS['stats_correction'], PARAMS['stats_thresh'], dbname, int(PARAMS[ 'stats_writegenes']), PARAMS['db_species'], int(PARAMS['stats_ngenes']), PARAMS['id_type'], submit=True)
import CGATPipelines.PipelineGSEnrichment as PipelineEnrichment import CGATPipelines.PipelineEnrichmentGSEA as PipelineGSEA import CGAT.IOTools as IOTools import pandas as pd import matplotlib.pyplot as plt import numpy as np from textwrap import wrap # load options from the config file PARAMS = P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ]) dbname = PARAMS['db_name'] unmapped = PipelineEnrichment.getUnmapped(PARAMS) outfilesuffixes = [ "_genestoterms.tsv", "_termstogenes.tsv", "_termstodetails.tsv", "_termstoont.tsv" ] unmappedouts = [["annotations.dir/%s%s" % (u, s) for s in outfilesuffixes] for u in unmapped] hpatissues = PARAMS['hpa_tissue'].split(",") hpatissues = [ 'clean_backgrounds.dir/%s_hpa_background.tsv' % tissue.replace(" ", "_") for tissue in hpatissues ] ########################################################
import CGATPipelines.PipelineGSEnrichment as PipelineEnrichment import CGATPipelines.PipelineEnrichmentGSEA as PipelineGSEA import CGAT.IOTools as IOTools import pandas as pd import matplotlib.pyplot as plt import numpy as np from textwrap import wrap # load options from the config file PARAMS = P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) dbname = PARAMS['db_name'] unmapped = PipelineEnrichment.getUnmapped(PARAMS) outfilesuffixes = ["_genestoterms.tsv", "_termstogenes.tsv", "_termstodetails.tsv", "_termstoont.tsv"] unmappedouts = [["annotations.dir/%s%s" % (u, s) for s in outfilesuffixes] for u in unmapped] hpatissues = PARAMS['hpa_tissue'].split(",") hpatissues = ['clean_backgrounds.dir/%s_hpa_background.tsv' % tissue.replace(" ", "_") for tissue in hpatissues] ######################################################## # Set up database connection