Ejemplo n.º 1
0
 def OnStatExport(self, event):
     
     if not self.project.has_key('exportFA'):
         outDir="%s/Export" % (self.project['directory'])
         createDir(outDir)
         self.project['exportFA']= ReportFA(outDir=outDir,name=self.project['name'], organism=self.project['organism'])
         
     try:
         self.project['exportFA'].saveStatistics(self.project['allFA'] , [statistics for statistics in self.workflow if self.viewStat])
         self.popup.msg(self.frame, "Congratulations, statistics have been successfully exported to %s" % self.project['exportFA'].outDir, "Operation")            
     except :
         self.popup.error(self.frame, "failed to export statistics to %s" % self.project['exportFA'].outDir)            
Ejemplo n.º 2
0
def compareRandomizePipelines(projectDir):
    """
    This function compare the properties of 3 randomized functional annotations for a Bovine array. 
    """

    projectName = "randomizePipeline"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function compare the properties of 3 randomized functional annotations for a Bovine array."
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read rice microarray target sequence to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))
    allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism))

    allPipeName = ["AFFY", "B2G", "AID"]
    allFileType = allPipeName

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    # Randomize FA
    randomizeFA = RandomizeFA()

    #-----------------------------------------------
    # Shuffle functional annotation
    batchList = ["shuffleAnnotation"]
    batchExecute(batchList, randomizeFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = ["coherence", "redundancy"]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)
    exportList = ["coherence", "redundancy"]
    report = ReportFA(name="Randomize shuffle",
                      outDir=outDir,
                      organism=organism)
    report.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                           exportList)
    report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                          exportList)

    #-----------------------------------------------
    # Resample functional annotation
    batchList = ["sampleAnnotation"]
    batchExecute(batchList, randomizeFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Analyse Functional annotations
    #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"]
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "redundancy", "specificity", "informationContent"
    ]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)
    #exportList=["coverage",  "numberAnnot",  "richness", "coherence",  "compactness", "specificity", "informationContent", "redundancy"]
    exportList = [
        "coverage", "numberAnnot", "richness", "specificity",
        "informationContent", "redundancy"
    ]
    report = ReportFA(name="Randomize sample",
                      outDir=outDir,
                      organism=organism)
    report.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                           exportList)
    report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                          exportList)

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
Ejemplo n.º 3
0
def compareBovinePipelines(projectDir):
    """
    This function compare the properties of 3 functional annotations for a Bovine array.
    """

    projectName = "bovinePipeline"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function compare the properties of 3 functional annotations for a Bovine array."
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read bovine microarray probe set to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))
    allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism))

    allPipeName = ["AFFY", "B2G", "AID"]
    allFileType = allPipeName

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    #-----------------------------------------------

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage",  "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"]
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "redundancy", "specificity", "informationContent"
    ]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #How big are the largest annotation sets ?
    analyseFA.largestSet([pipeline[pipeName] for pipeName in allPipeName])
    logger.info("The largest sets of annotations are :")
    for pipeName in allPipeName:
        FA = pipeline[pipeName]
        logger.info("\t%d for %s" %
                    (FA['largestSet']['All_aspects_of_GO'], FA.name))

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Annotation pipelines",
                    outDir=outDir,
                    name=projectName,
                    organism=organism,
                    ext="png")
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True)

    #batchList=["coherenceHisto2D", "numberAnnotHisto2D"]
    batchList = ["numberAnnotHisto2D"]
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True,
                 tit="")

    #-----------------------------------------------

    #Compare  Functional annotations
    compareFA = CompareFA()
    batchList = ["venn", "funcSim"]
    batchExecute(batchList, compareFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Plot statistics of the comparison between Functional annotations
    batchList = ["venn", "funcSymSim"]
    batchExecute(batchList,
                 plotFA,
                 compareFA, [pipeline[pipeName] for pipeName in allPipeName],
                 tit="")

    #-----------------------------------------------
    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)

    #exportList=["unconnected", "coverage",  "richness", "numberAnnot",  "coherence",  "compactness", "specificity", "informationContent", "redundancy"]
    exportList = [
        "unconnected", "coverage", "richness", "numberAnnot", "specificity",
        "informationContent", "redundancy"
    ]
    reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism)
    reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                             exportList)
    reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                            exportList)

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
Ejemplo n.º 4
0
from AIGO.ReferenceSet import RefSet
from AIGO.FunctionalAnnotation import FuncAnnot
from AIGO.go.OBO import readGOoboXML

from AIGO.Analyse import AnalyseFA
from AIGO.Report import ReportFA

from AIGO.utils.Execute import batchExecute

refSet = RefSet(organism="platypus",
                fileName="platypus.refSet",
                refType="Text")
G = readGOoboXML("go_daily-termdb.obo-xml")
FA = FuncAnnot("platypusProject", refSet, G, organism="platypus")
FA.read("platypus.gaf", "GAF")

analyseFA = AnalyseFA()

analyseFA.largestSet([FA])
logger.info("Largest sets of annotations:")
logger.info("\t%d for %s" % (FA['largestSet']['All_aspects_of_GO'], FA.name))

batchList = [
    "coverage", "richness", "numberAnnot", "redundancy", "specificity",
    "informationContent", "hPrecision"
]
batchExecute(batchList, analyseFA, [FA])

reportFA = ReportFA(outDir=None, name="platypusProject", organism="platypus")
reportFA.printStatistics([FA], batchList)