def OnLoadGO(self, event): fileName=self.frame.notebook.box_GO.GetValue() #refType=self.frame.notebook.radio_btn_GO.GetStringSelection() force=self.frame.notebook.checkbox_GO.GetValue() GO=readGOoboXML(fileName=fileName, force=force) self.project.addGO(GO) self.event.notify("GO") self.updateStatus()
def compareRiceAffymetrixReleases(projectDir): """ This function compare the properties of 10 release of Affymetrix annotations for a Rice array. """ projectName = "Affymetrix" organism = "rice" #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism, fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read 11 release of Affymetrix Functional annotations release = arange(20, 32) Affy = dict() for r in release: FA = FuncAnnot(str(r), refSet, G, organism=organism) fileName = "%s/Annotation/Affy_%s.na%d.annot.csv" % (projectDir, organism, r) FA.read(fileName, fileType="AFFY") Affy[r] = FA #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness" ] batchExecute(batchList, analyseFA, [Affy[r] for r in release]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Affymetrix Release number", outDir=outDir, name=projectName, organism=organism, ext="pdf") batchExecute(batchList, plotFA, [Affy[r] for r in release], doGrid=True, lloc="upper right") #Compare release 20 and 31 compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [Affy[20], Affy[31]]) #Plot statistics of the comparison batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [Affy[20], Affy[31]], doGrid=True, tit="") #Find the worse semantic similarity between 20 and 31 worseFunctionalSimilarity(projectDir, organism, Affy[20], Affy[31]) #Study the evolution of Glutamine Synthetase (GS) annotations GSAnnotations(projectDir, organism, G, Affy)
def compareSimilarity(projectDir): """ This function compare Similarity measures. """ projectName = "simPipeline" organism = "bovine" #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allPipeName = ["AFFY", "B2G"] allFileType = allPipeName pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #----------------------------------------------- #Analyse Functional annotations analyseFA = AnalyseFA() batchList = ["removeUnconnected"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Compute information content logger.info("=================================================") logger.info("Computing Information Content") allIC = dict() for pipeName in allPipeName: FA = pipeline[pipeName] logger.info("\t%s" % FA.name) allIC[pipeName] = dict() for a in FA.G.aspect: allIC[pipeName][a] = dict() for go in FA.GOtoGP[a]: n = len(FA.GOtoGP[a][go]) for ans in FA.G.ancestors(FA.G.get_intid(go)): allIC[pipeName][a][ans] = allIC[pipeName][a].get(ans, 0) + n for a in FA.G.aspect: if len(allIC[pipeName][a].values()) == 0: continue m = max(allIC[pipeName][a].values()) for go in allIC[pipeName][a]: allIC[pipeName][a][go] = -1. * log( 1. * allIC[pipeName][a][go] / m) #Compare coherence of biological process annotation sets in AFFY given by three different similarity metrics logger.info("=================================================") logger.info( "Computing functional coherence of biological process annotation sets in AFFY given by three different similarity metrics" ) aspect = "biological_process" pipeName = "B2G" FA = pipeline[pipeName] logger.info("\tGS2") allGS2 = [ mean(GOSet_Similarity(G, FA.GPtoGO[aspect][gp], metric="GS2")) for gp in FA.GPtoGO[aspect] if len(FA.GPtoGO[aspect][gp]) > 1 ] logger.info("\tCzekanowskiDice") allCD = [ mean( GOSet_Similarity(G, FA.GPtoGO[aspect][gp], metric="CzekanowskiDice")) for gp in FA.GPtoGO[aspect] if len(FA.GPtoGO[aspect][gp]) > 1 ] logger.info("\tResnik") allResnik = [ mean( GOSet_Similarity(G, FA.GPtoGO[aspect][gp], metric="Resnik", IC=allIC[FA.name])) for gp in FA.GPtoGO[aspect] if len(FA.GPtoGO[aspect][gp]) > 1 ] logger.info("\tCorrelation between GS2 and CzekanowskiDice : %.2f" % corrcoef(allGS2, allCD)[0][1]) logger.info("\tCorrelation between CzekanowskiDice and Resnik: %.2f" % corrcoef(allCD, allResnik)[0][1]) #Compare molecular function annotation sets in AFFY and B2G using three similarity metrics logger.info("=================================================") logger.info( "Comparing molecular function annotation sets in AFFY and B2G using three different similarity metrics" ) aspect = "molecular_function" commonGene = set(pipeline["AFFY"].GPtoGO[aspect].keys()).intersection( pipeline["B2G"].GPtoGO[aspect].keys()) logger.info("\tProcessing %d genes" % len(commonGene)) allGS2, allCD, allResnik = list(), list(), list() for gp in commonGene: GO1 = pipeline["AFFY"].GPtoGO[aspect][gp] GO2 = pipeline["B2G"].GPtoGO[aspect][gp] allGS2.append(GOSet_PWSimilarity(G, GO1, GO2, metric="GS2")[0]) allCD.append( GOSet_PWSimilarity(G, GO1, GO2, metric="CzekanowskiDice")[0]) allResnik.append( GOSet_PWSimilarity(G, GO1, GO2, metric="Resnik", IC=allIC[FA.name])[0]) logger.info("\tCorrelation between GS2 and CzekanowskiDice : %.2f" % corrcoef(allGS2, allCD)[0][1]) logger.info("\tCorrelation between CzekanowskiDice and Resnik: %.2f" % corrcoef(allCD, allResnik)[0][1])
def compareEvidence(projectDir): """ This function compare electronically infered and manually curated annotations to experimental annotations """ projectName = "EvidenceCode" organism = "allSpecies" refSet = RefSet(organism) allOrg = [ "Arabidopsis_thaliana", "Drosophila_melanogaster", "Mycobacterium_tuberculosis_ATCC_25618", "Schizosaccharomyces_pombe", "Bos_taurus", "Escherichia_coli_ATCC_27325", "Mycobacterium_tuberculosis_Oshkosh", "Caenorhabditis_elegans", "Escherichia_coli_MG1655", "Oryza_sativa", "Synechocystis_sp", "Candida_albicans_SC5314", "Gallus_gallus", "Pseudomonas_fluorescens_Pf-5", "Danio_rerio", "Homo_sapiens", "Rattus_norvegicus" ] for refOrg in allOrg: #Define the set of gene products fileName = "%s/EvidenceCode/%s/two_experimental_evidence.goa" % ( projectDir, refOrg) refSet.add(fileName, refType="GAF") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read all annotations fileType = "GAF" evidenceCodes = ["EXP2", "IC", "TAS", "ISS", "NAS", "IEA"] allFA = dict() #----------------------------------------------- #Read Functional annotations obtained by experiments pipeName = "EXP2" EXP2 = FuncAnnot(pipeName, refSet, G, organism=organism) for refOrg in allOrg: fileName = "%s/EvidenceCode/%s/two_experimental_evidence.goa" % ( projectDir, refOrg) FA = FuncAnnot(pipeName, refSet, G, organism=refOrg) FA.read(fileName, fileType=fileType) EXP2.add(FA) allFA[pipeName] = EXP2 #----------------------------------------------- #Read Functional annotations obtained by human curation for pipeName in ["IC", "TAS", "ISS", "NAS"]: EV = FuncAnnot(pipeName, refSet, G, organism=organism) for refOrg in allOrg: fileName = "%s/EvidenceCode/%s/%s.goa" % (projectDir, refOrg, pipeName) if not os.path.exists(fileName): continue FA = FuncAnnot(pipeName, refSet, G, organism=refOrg) FA.read(fileName, fileType=fileType) EV.add(FA) allFA[pipeName] = EV #Merge FAs Assigned by Human Curator FA = FuncAnnot("AHC", refSet, G, organism=organism) #for evidence in ["IC", "TAS", "ISS", "NAS"]: for evidence in ["IC", "ISS", "NAS"]: FA.add(allFA[evidence]) allFA["AHC"] = FA #----------------------------------------------- #Read Functional annotations obtained without human curation for pipeName in ["IEA"]: EV = FuncAnnot(pipeName, refSet, G, organism=organism) for refOrg in allOrg: fileName = "%s/EvidenceCode/%s/%s.goa" % (projectDir, refOrg, pipeName) if not os.path.exists(fileName): continue FA = FuncAnnot(pipeName, refSet, G, organism=refOrg) FA.read(fileName, fileType=fileType) EV.add(FA) allFA[pipeName] = EV #----------------------------------------------- listFA = ["EXP2", "AHC", "IEA"] #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "removeRedundancy", "compactness", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [allFA[evidence] for evidence in listFA]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Evidence Codes", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [allFA[evidence] for evidence in listFA], doGrid=True) batchList = ["coherenceHisto2D", "numberAnnotHisto2D"] batchExecute(batchList, plotFA, [allFA[evidence] for evidence in listFA], doGrid=True) #Compare Functional annotations compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [allFA[evidence] for evidence in listFA]) batchList = ["recall", "precision"] batchExecute(batchList, compareFA, [allFA[evidence] for evidence in listFA]) #Plot statistics of the comparison between Functional annotations batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [allFA[evidence] for evidence in listFA]) batchList = ["recall", "precision"] batchExecute(batchList, plotFA, compareFA, [allFA[evidence] for evidence in listFA]) #----------------------------------------------- #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList = [ "unconnected", "coverage", "richness", "numberAnnot", "coherence", "compactness", "specificity", "informationContent", "redundancy" ] reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([allFA[evidence] for evidence in listFA], exportList) reportFA.saveStatistics([allFA[evidence] for evidence in listFA], exportList) #----------------------------------------------- # Invididual contributions of evidence codes contribution = dict() for ec in ["IC", "TAS", "ISS", "NAS", "IEA"]: contribution[ec] = set([ (gp, go) for aspect in ["cellular_component", "molecular_function", "biological_process"] for gp in allFA[ec].GPtoGO[aspect] for go in allFA[ec].GPtoGO[aspect][gp] ]) total_Annotation = sum( [len(contribution[ec]) for ec in ["IC", "TAS", "ISS", "NAS"]]) for ec in ["IC", "TAS", "ISS", "NAS"]: print "%.02f %% of the annotations are supported by %s" % ( 100. * len(contribution[ec]) / total_Annotation, ec) batchList = ["recall", "precision"] batchExecute(batchList, compareFA, [ allFA[evidence] for evidence in ["EXP2", "ISS", "TAS", "NAS", "IC", "AHC", "IEA"] ]) #----------------------------------------------- reference = "EXP2" #plotEvidence=["AHC", "IEA"] plotEvidence = ["ISS", "TAS", "NAS", "IC", "AHC", "IEA"] evidenceMarker = dict(zip(plotEvidence, ['s', 'd', 'D', '*', 'p', 'h'])) evidenceSize = dict(zip(plotEvidence, [8, 8, 8, 8, 15, 15])) aspectColor = dict(zip(allAspect, ["blue", "green", "red", "cyan"])) fig = figure(figsize=(8, 8)) for evidence in plotEvidence: for aspect in allAspect: if aspect == "All_aspects_of_GO": continue allX = compareFA['recall'][aspect][(evidence, reference)].values() allY = compareFA['precision'][aspect][(evidence, reference)].values() meanX = mean(allX) errX = std(allX) / sqrt(len(allX)) meanY = mean(allY) errY = std(allY) / sqrt(len(allY)) errorbar(meanX, meanY, xerr=errX, yerr=errY, alpha=0.9, hold=True, mfc=aspectColor[aspect], ecolor=aspectColor[aspect], marker=evidenceMarker[evidence], ms=evidenceSize[evidence]) xlabel("Verspoor Hierarchical Recall") ylabel("Verspoor Hierarchical Precision") allMarker = ['o', 'o', 'o', 's', 'd', 'D', '*', 'p', 'h'] allColor = [ "green", "red", "cyan", "white", "white", "white", "white", "white", "white" ] allLabel = [ aspect.replace("_", " ") for aspect in allAspect if not aspect == "All_aspects_of_GO" ] allLabel.extend(plotEvidence) foo = [ Line2D(arange(5), arange(5), ls='-', marker=m, color=c, label=l) for m, c, l in zip(allMarker, allColor, allLabel) ] leg = legend(foo, allLabel, loc="upper left", numpoints=1) leg.legendPatch.set_alpha(0.5) grid() outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) figName = "%s/PrecisionVSRecall.png" % outDir savefig(figName)
def compare_COPSAandB2G(projectDir): """ Add some comments here """ organism = "wheat" #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) projectName = "MATT" #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/COPSA_%s.tab" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allPipeName = ["COPSA", "B2G"] allFileType = ["GP2GO", "B2G"] pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #COPSA annotations but only for GPs that are also annotated by B2G FA = FuncAnnot("COPSAandB2G", refSet, G, organism=organism) FA.add(pipeline["COPSA"]) #So remove the GP that are not in B2G for aspect in G.aspect: copsaOnly = set(FA.GPtoGO[aspect].keys()).difference( pipeline["B2G"].GPtoGO[aspect].keys()) FA.removeGP(copsaOnly, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #B2G annotations but only for GPs that are also annotated by COPSA FA = FuncAnnot("B2GandCOPSA", refSet, G, organism=organism) FA.add(pipeline["B2G"]) #So remove the GP that are not in B2G for aspect in G.aspect: b2gOnly = set(FA.GPtoGO[aspect].keys()).difference( pipeline["COPSA"].GPtoGO[aspect].keys()) FA.removeGP(b2gOnly, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #COPSA annotations only FA = FuncAnnot("COPSAonly", refSet, G, organism=organism) FA.add(pipeline["COPSA"]) #So remove the GP that are in B2G for aspect in G.aspect: b2g = pipeline["B2G"].GPtoGO[aspect].keys() FA.removeGP(b2g, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #B2G annotations only FA = FuncAnnot("B2Gonly", refSet, G, organism=organism) FA.add(pipeline["B2G"]) #So remove the GP that are in COPSA for aspect in G.aspect: copsa = pipeline["COPSA"].GPtoGO[aspect].keys() FA.removeGP(copsa, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent" ] batchList = ["removeUnconnected"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) compareCoexpression([ pipeline[name] for name in ["COPSAandB2G", "B2GandCOPSA", "COPSAonly", "B2Gonly"] ])
def compareWheatPipelines(projectDir): """ Add some comments here """ organism = "wheat" #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) projectName = "MATT" #Read Functional annotations allFileName = list() allFileName.append( "%s/Annotation/blast2goPaths_fin_aracyc_%s_unionBest.tab" % (projectDir, organism)) allFileName.append("%s/Annotation/pfam2goPaths2_%s_unionBest.tab" % (projectDir, organism)) allFileName.append( "%s/Annotation/pfam2goPaths2_%s_unionBest___blast2goPaths_fin_aracyc_%s_unionBest_merged.tab" % (projectDir, organism, organism)) allFileName.append("%s/Annotation/COPSA_%s.tab" % (projectDir, organism)) allFileName.append("%s/Annotation/Affy_%s.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allPipeName = ["Blast", "Pfam", "Merge", "COPSA", "AFFY", "B2G"] allFileType = ["GP2GO", "GP2GO", "GP2GO", "GP2GO", "AFFY", "B2G"] pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) batchList = ["coherenceHisto2D", "numberAnnotHisto2D"] batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) #Compare Functional annotations compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [pipeline[pipeName] for pipeName in ["COPSA", "AFFY", "B2G"]]) #Plot statistics of the comparison between Functional annotations batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [pipeline[pipeName] for pipeName in ["COPSA", "AFFY", "B2G"]]) #----------------------------------------------- #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList = [ "unconnected", "coverage", "numberAnnot", "richness", "coherence", "compactness", "specificity", "informationContent", "redundancy" ] reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList)
def GOFrequencyBovinePipelines(projectDir): """ This function plot the frequency of GO terms in three bovine functional annotation """ projectName = "bovinePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function plot the frequency of GO terms from 3 functional annotations for a Bovine array" ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID"] allFileType = allPipeName pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #---------------------------------------------- #Plot frequency of GO terms in a radial grah outDir = "%s/Graph/%s" % (projectDir, organism) logger.info("=================================================") logger.info("Plotting frequency of GO terms") logger.info("directory : %s" % outDir) for aspect in G.aspect: logger.info("%s : " % aspect) A = None for pipeName in allPipeName: l = array([ log(1 + len(pipeline[pipeName].GOtoGP[aspect].get(go, []))) for go in G.get_NodesfromAspect(aspect) ]) l = l / max(l) * 256. l = [int(round(n)) for n in l] freq = dict([(n, c) for n, c in zip(G.get_NodesfromAspect(aspect), l)]) figName = "%s/Frequency_%s_%s.png" % ( outDir, pipeline[pipeName].name, aspect) A = G.plot_FrequencyGraph(aspect, freq, figName=figName, ttl="", graphviz=A) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def worseFunctionalSimilarity(projectDir): """ This function identifies the ten most different annotation sets between Affymetrix and Blast2GO for a Bovine array """ from AIGO.Similarity import GOSet_PWSimilarity from itertools import izip projectName = "bovinePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function identifies the ten most different annotation sets between Affymetrix and Blast2GO for a Bovine array" ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) fileName = "%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism) FA1 = FuncAnnot("AFFY", refSet, G, organism=organism) FA1.read(fileName, fileType="AFFY") fileName = "%s/Annotation/B2G_%s.annot" % (projectDir, organism) FA2 = FuncAnnot("B2G", refSet, G, organism=organism) FA2.read(fileName, fileType="B2G") #Analyse Functional annotations analyseFA = AnalyseFA() batchExecute(["removeUnconnected"], analyseFA, [FA1, FA2]) outDir = "%s/Graph/%s/WorseFuncSim" % (projectDir, organism) createDir(outDir) N = 10 logger.info("=================================================") logger.info("Plotting the %d most dissimilar annotation sets" % N) logger.info("directory : %s" % outDir) for aspect in G.aspect: commonGene = set(FA1.GPtoGO[aspect].keys()).intersection( FA2.GPtoGO[aspect].keys()) logger.info("%s : processing %d annotation sets " % (aspect, len(commonGene))) allD1, allD2 = list(), list() for i, g in enumerate(commonGene): sim, l = GOSet_PWSimilarity(G, FA1.GPtoGO[aspect][g], FA2.GPtoGO[aspect][g]) allD1.append(l[0]) allD2.append(l[1]) allD = map(lambda D: ((array(D[0]) + array(D[1])) / 2.), izip(allD1, allD2)) idx = argsort(allD) for i in arange(0, N): gp = list(commonGene)[idx[i]] figName = "%s/%s_annotation_%s_from_%s_%s.png" % ( outDir, aspect, gp, FA1.name, FA2.name) ttl = "%s annotations of %s from %s (green) and %s (red) : Functional similarity = %.2f" % ( aspect.replace("_", " "), gp, FA1.name, FA2.name, allD[idx[i]]) FA1.G.compare_InducedGraph(FA1.GPtoGO[aspect][gp], FA2.GPtoGO[aspect][gp], figName=figName, ttl=ttl) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def compareBovineAndRandom(projectDir): """ This function compare the properties of 3 functional annotations for a Bovine array + a randomize version of Affymetrix functional annotations """ projectName = "BovineAndRandom" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function compare the properties of 3 functional annotations for a Bovine array + a randomize version of Affymetrix functional annotations." ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s//Annotation/AID_%s.txt" % (projectDir, organism)) allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID", "resample"] allFileType = ["AFFY", "B2G", "AID", "AFFY"] pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA # Randimize FA randomizeFA = RandomizeFA() analyseFA = AnalyseFA() #----------------------------------------------- # Shuffle functional annotation batchList = ["sampleAnnotation"] batchExecute(batchList, randomizeFA, [pipeline[pipeName] for pipeName in ["resample"]]) batchList = ["coherence", "redundancy", "numberAnnot"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name="Resample", organism=organism, ext="png") batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) batchList = ["coherenceHisto2D", "numberAnnotHisto2D"] batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True, tit="") logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def compareRandomizePipelines(projectDir): """ This function compare the properties of 3 randomized functional annotations for a Bovine array. """ projectName = "randomizePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function compare the properties of 3 randomized functional annotations for a Bovine array." ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID"] allFileType = allPipeName pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA # Randomize FA randomizeFA = RandomizeFA() #----------------------------------------------- # Shuffle functional annotation batchList = ["shuffleAnnotation"] batchExecute(batchList, randomizeFA, [pipeline[pipeName] for pipeName in allPipeName]) #Analyse Functional annotations analyseFA = AnalyseFA() batchList = ["coherence", "redundancy"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList = ["coherence", "redundancy"] report = ReportFA(name="Randomize shuffle", outDir=outDir, organism=organism) report.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) #----------------------------------------------- # Resample functional annotation batchList = ["sampleAnnotation"] batchExecute(batchList, randomizeFA, [pipeline[pipeName] for pipeName in allPipeName]) #Analyse Functional annotations #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"] batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "redundancy", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) #exportList=["coverage", "numberAnnot", "richness", "coherence", "compactness", "specificity", "informationContent", "redundancy"] exportList = [ "coverage", "numberAnnot", "richness", "specificity", "informationContent", "redundancy" ] report = ReportFA(name="Randomize sample", outDir=outDir, organism=organism) report.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def compareBovinePipelines(projectDir): """ This function compare the properties of 3 functional annotations for a Bovine array. """ projectName = "bovinePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function compare the properties of 3 functional annotations for a Bovine array." ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID"] allFileType = allPipeName pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #----------------------------------------------- #Analyse Functional annotations analyseFA = AnalyseFA() #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"] batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "redundancy", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #How big are the largest annotation sets ? analyseFA.largestSet([pipeline[pipeName] for pipeName in allPipeName]) logger.info("The largest sets of annotations are :") for pipeName in allPipeName: FA = pipeline[pipeName] logger.info("\t%d for %s" % (FA['largestSet']['All_aspects_of_GO'], FA.name)) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name=projectName, organism=organism, ext="png") batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) #batchList=["coherenceHisto2D", "numberAnnotHisto2D"] batchList = ["numberAnnotHisto2D"] batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True, tit="") #----------------------------------------------- #Compare Functional annotations compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of the comparison between Functional annotations batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [pipeline[pipeName] for pipeName in allPipeName], tit="") #----------------------------------------------- #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) #exportList=["unconnected", "coverage", "richness", "numberAnnot", "coherence", "compactness", "specificity", "informationContent", "redundancy"] exportList = [ "unconnected", "coverage", "richness", "numberAnnot", "specificity", "informationContent", "redundancy" ] reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
#!/usr/bin/env python from AIGO import logger from AIGO.ReferenceSet import RefSet from AIGO.FunctionalAnnotation import FuncAnnot from AIGO.go.OBO import readGOoboXML from AIGO.Analyse import AnalyseFA from AIGO.Report import ReportFA from AIGO.utils.Execute import batchExecute refSet = RefSet(organism="platypus", fileName="platypus.refSet", refType="Text") G = readGOoboXML("go_daily-termdb.obo-xml") FA = FuncAnnot("platypusProject", refSet, G, organism="platypus") FA.read("platypus.gaf", "GAF") analyseFA = AnalyseFA() analyseFA.largestSet([FA]) logger.info("Largest sets of annotations:") logger.info("\t%d for %s" % (FA['largestSet']['All_aspects_of_GO'], FA.name)) batchList = [ "coverage", "richness", "numberAnnot", "redundancy", "specificity", "informationContent", "hPrecision" ] batchExecute(batchList, analyseFA, [FA])