def cleanUserBackgrounds(infile, outfile):
    '''
    Removes duplicates from user specified backgrounds and converts IDs to
    ensemblg.
    '''
    idtype = PARAMS['id_type']
    dbname = PARAMS['db_name']
    PipelineEnrichment.cleanGeneLists(infile, outfile, idtype, dbname,
                                      submit=True)
def cleanUserBackgrounds(infile, outfile):
    '''
    Removes duplicates from user specified backgrounds and converts IDs to
    ensemblg.
    '''
    idtype = PARAMS['id_type']
    dbname = PARAMS['db_name']
    PipelineEnrichment.cleanGeneLists(infile, outfile, idtype, dbname,
                                      submit=True)
def cleanForegrounds(infile, outfile):
    '''
    Removes duplicates from the foreground and converts IDs to ensemblg.
    '''
    idtype = PARAMS['id_type']
    dbname = PARAMS['db_name']
    E.info(idtype)
    PipelineEnrichment.cleanGeneLists(infile, outfile, idtype, dbname,
                                      submit=True)
def cleanForegrounds(infile, outfile):
    '''
    Removes duplicates from the foreground and converts IDs to ensemblg.
    '''
    idtype = PARAMS['id_type']
    dbname = PARAMS['db_name']
    E.info(idtype)
    PipelineEnrichment.cleanGeneLists(infile, outfile, idtype, dbname,
                                      submit=True)
Esempio n. 5
0
def mapUnmappedAnnotations(outfiles):
    '''
    Allows the user to easily add annotations not in the database.
    Requires an "annotations2annotations.py" command specified in the
    pipeline.ini providing details of a flat file containing the necessary
    information to build the AnnotationSet.
    '''
    ua = "annotations.dir/"
    outstem = outfiles[0].replace(ua, "")
    outstem = outstem.replace("_genestoterms.tsv", "")
    substatement = unmapped[outstem]
    outstem2 = outfiles[0].replace("_genestoterms.tsv", "")
    PipelineEnrichment.getFlatFileAnnotations(substatement, outstem2, dbname)
Esempio n. 6
0
def getDBAnnotations(infile, outfiles):
    '''
    Takes a database (generated using get_gene_annotations.py) and uses this
    to build a series of AnnotationSets.

    One AnnotationSet is generated for each table in the database with
    the $annot suffix.

    AnnotationSets are stored in output files in the annotations.dir
    directory.
    '''
    dbname = PARAMS['db_name']
    PipelineEnrichment.getDBAnnotations(infile, outfiles, dbname, submit=True)
def mapUnmappedAnnotations(outfiles):
    '''
    Allows the user to easily add annotations not in the database.
    Requires an "annotations2annotations.py" command specified in the
    pipeline.ini providing details of a flat file containing the necessary
    information to build the AnnotationSet.
    '''
    ua = "annotations.dir/"
    outstem = outfiles[0].replace(ua, "")
    outstem = outstem.replace("_genestoterms.tsv", "")
    substatement = unmapped[outstem]
    outstem2 = outfiles[0].replace("_genestoterms.tsv", "")
    PipelineEnrichment.getFlatFileAnnotations(substatement, outstem2, dbname)
def getDBAnnotations(infile, outfiles):
    '''
    Takes a database (generated using get_gene_annotations.py) and uses this
    to build a series of AnnotationSets.

    One AnnotationSet is generated for each table in the database with
    the $annot suffix.

    AnnotationSets are stored in output files in the annotations.dir
    directory.
    '''
    dbname = PARAMS['db_name']
    PipelineEnrichment.getDBAnnotations(infile, outfiles, dbname, submit=True)
Esempio n. 9
0
def buildHPABackground(outfile):
    '''
    Builds a background geneset based on human protein atlas expression values
    specified in pipeline.ini - allows the user to use a tissue specific
    background
    '''
    tissue = outfile.split("/")[1].split("_")[0].replace("_", " ")
    PipelineEnrichment.HPABackground(tissue,
                                     PARAMS['hpa_minlevel'],
                                     PARAMS['hpa_supportive'],
                                     outfile,
                                     submit=True)
def foregroundsVsBackgrounds(infiles, outfiles):
    '''
    Takes every possible set of one foreground, one background and one
    AnnotationSet and performs enrichment analysis.  Analysis is
    performed based on the "stats" parameters in the pipeline.ini.
    Results are written to tab delimited files in results.dir, the _sig.tsv
    output file contains signficantly enriched terms only, the other output
    file contains all terms.
    '''
    PipelineEnrichment.foregroundsVsBackgrounds(infiles,
                                                outfiles[0], outfiles[1],
                                                PARAMS['stats_testtype'],
                                                PARAMS['stats_runtype'],
                                                PARAMS['stats_correction'],
                                                PARAMS['stats_thresh'],
                                                dbname,
                                                int(PARAMS['stats_writegenes']),
                                                PARAMS['db_species'],
                                                int(PARAMS['stats_ngenes']),
                                                PARAMS['id_type'],
                                                submit=True)
def foregroundsVsBackgrounds(infiles, outfiles):
    '''
    Takes every possible set of one foreground, one background and one
    AnnotationSet and performs enrichment analysis.  Analysis is
    performed based on the "stats" parameters in the pipeline.ini.
    Results are written to tab delimited files in results.dir, the _sig.tsv
    output file contains signficantly enriched terms only, the other output
    file contains all terms.
    '''
    PipelineEnrichment.foregroundsVsBackgrounds(infiles,
                                                outfiles[0], outfiles[1],
                                                PARAMS['stats_testtype'],
                                                PARAMS['stats_runtype'],
                                                PARAMS['stats_correction'],
                                                PARAMS['stats_thresh'],
                                                dbname,
                                                int(PARAMS[
                                                    'stats_writegenes']),
                                                PARAMS['db_species'],
                                                int(PARAMS['stats_ngenes']),
                                                PARAMS['id_type'],
                                                submit=True)
Esempio n. 12
0
import CGATPipelines.PipelineGSEnrichment as PipelineEnrichment
import CGATPipelines.PipelineEnrichmentGSEA as PipelineGSEA
import CGAT.IOTools as IOTools
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from textwrap import wrap

# load options from the config file
PARAMS = P.getParameters([
    "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini",
    "pipeline.ini"
])

dbname = PARAMS['db_name']
unmapped = PipelineEnrichment.getUnmapped(PARAMS)
outfilesuffixes = [
    "_genestoterms.tsv", "_termstogenes.tsv", "_termstodetails.tsv",
    "_termstoont.tsv"
]

unmappedouts = [["annotations.dir/%s%s" % (u, s) for s in outfilesuffixes]
                for u in unmapped]

hpatissues = PARAMS['hpa_tissue'].split(",")
hpatissues = [
    'clean_backgrounds.dir/%s_hpa_background.tsv' % tissue.replace(" ", "_")
    for tissue in hpatissues
]

########################################################
import CGATPipelines.PipelineGSEnrichment as PipelineEnrichment
import CGATPipelines.PipelineEnrichmentGSEA as PipelineGSEA
import CGAT.IOTools as IOTools
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from textwrap import wrap

# load options from the config file
PARAMS = P.getParameters(
    ["%s/pipeline.ini" % os.path.splitext(__file__)[0],
     "../pipeline.ini",
     "pipeline.ini"])

dbname = PARAMS['db_name']
unmapped = PipelineEnrichment.getUnmapped(PARAMS)
outfilesuffixes = ["_genestoterms.tsv",
                   "_termstogenes.tsv",
                   "_termstodetails.tsv",
                   "_termstoont.tsv"]

unmappedouts = [["annotations.dir/%s%s" % (u, s)
                 for s in outfilesuffixes]
                for u in unmapped]

hpatissues = PARAMS['hpa_tissue'].split(",")
hpatissues = ['clean_backgrounds.dir/%s_hpa_background.tsv'
              % tissue.replace(" ", "_") for tissue in hpatissues]

########################################################
# Set up database connection