def test_consensusEcGraph_difference(self): FEV_KEGG.startProcessPool() enterobacteriales_organisms_abbreviations = [ 'eco', 'ses', 'sfl', 'ent', 'esa', 'kpn', 'cko', 'ype', 'spe', 'buc' ] enterobacteriales_organisms = Organism.Group( organismAbbreviations=enterobacteriales_organisms_abbreviations) enterobacteriales_organisms_abbreviations = [ 'eco', 'ses', 'sfl', 'ent', 'esa', 'kpn', 'cko', 'ype', 'spe', 'buc' ] gammaproteobacteria_organisms_abbreviations = [ 'hin', 'mht', 'xcc', 'vch', 'pae', 'acb', 'son', 'pha', 'amc', 'lpn', 'ftu', 'aha' ] gammaproteobacteria_organisms_abbreviations.extend( enterobacteriales_organisms_abbreviations ) # extend with the sub-set, because they are also part of the set gammaproteobacteria_organisms = Organism.Group( organismAbbreviations=gammaproteobacteria_organisms_abbreviations) enterobacteriales_EC_graph = enterobacteriales_organisms.consensusEcGraph( noMultifunctional=True) gammaproteobacteria_EC_graph = gammaproteobacteria_organisms.consensusEcGraph( noMultifunctional=True) enterobacteriales_EC_set = enterobacteriales_EC_graph.getECs() gammaproteobacteria_EC_set = gammaproteobacteria_EC_graph.getECs() only_enterobacteriales_EC_set = enterobacteriales_EC_set.difference( gammaproteobacteria_EC_set) output = [] for ec in only_enterobacteriales_EC_set: output.append(ec.__str__()) result = len(output) print(str(result) + ' results') self.assertEqual(result, 87) enterobacteriales_enzyme_graph = enterobacteriales_organisms.collectiveEnzymeGraphByEcConsensus( noMultifunctional=True) gammaproteobacteria_enzyme_graph = gammaproteobacteria_organisms.collectiveEnzymeGraphByEcConsensus( noMultifunctional=True) enterobacteriales_enzyme_graph.removeMultifunctionalEnzymes() gammaproteobacteria_enzyme_graph.removeMultifunctionalEnzymes() enterobacteriales_enzymes = enterobacteriales_enzyme_graph.getEnzymes() gammaproteobacteria_enzymes = gammaproteobacteria_enzyme_graph.getEnzymes( ) enterobacteriales_EC_set_2 = set() for enzyme in enterobacteriales_enzymes: ecNumbers = enzyme.ecNumbers enterobacteriales_EC_set_2.update(ecNumbers) gammaproteobacteria_EC_set_2 = set() for enzyme in gammaproteobacteria_enzymes: ecNumbers = enzyme.ecNumbers gammaproteobacteria_EC_set_2.update(ecNumbers) only_enterobacteriales_EC_set_2 = enterobacteriales_EC_set_2.difference( gammaproteobacteria_EC_set_2) output = [] for ec in only_enterobacteriales_EC_set_2: output.append(ec.__str__()) result2 = len(output) print(str(result2) + ' results') self.assertEqual(result2, result) output = [] for ec in only_enterobacteriales_EC_set_2.symmetric_difference( only_enterobacteriales_EC_set): output.append(ec.__str__()) result3 = len(output) print(str(result3) + ' results') self.assertEqual(result3, 0) for ecString in output: print(ecString)
""" from FEV_KEGG.Evolution.Events import SimpleGeneDuplication, ChevronGeneDuplication, NeofunctionalisedECs,\ NeofunctionalisedEnzymes from FEV_KEGG.Evolution.Taxonomy import NCBI import FEV_KEGG.KEGG.Organism as Organism if __name__ == '__main__': output = [] #- get NCBI taxonomy tree taxonomy = NCBI.getTaxonomy() #- get group of organisms 'Archaea/Thaumarchaeota' group = Organism.Group( taxonomy.getOrganismAbbreviationsByPath('Archaea/Thaumarchaeota', oneOrganismPerSpecies=False)) #- get supergroup of organisms 'Archaea' supergroup = Organism.Group( taxonomy.getOrganismAbbreviationsByPath('Archaea', oneOrganismPerSpecies=False)) #- calculate new EC numbers occuring in group's core metabolism compared to supergroup's core metabolism newECs = group.consensusEcGraph( noMultifunctional=True).getECs().difference( supergroup.consensusEcGraph(noMultifunctional=True).getECs()) output.append('new EC numbers: ' + str(len(newECs))) #- calculate neofunctionalised EC numbers in group's core metabolism descendantEnzymeGraph = group.collectiveEnzymeGraphByEcConsensus(
'pha', 'pin', 'plu', 'ppr', 'rma', 'saz', 'sde', 'sdn', 'shm', 'tcx', 'vfi', 'vvu', 'xca' ] organisms = representativeOrganisms output.append('Representative:') elif i == 2: #- 2. get group of organisms 'Gammaproteobacteria', excluding unclassified organisms = taxonomy.getOrganismAbbreviationsByPath( 'Gammaproteobacteria', exceptPaths='unclassified', oneOrganismPerSpecies=False) output.append('\nGammaproteobacteria without unclassified:') group = Organism.Group(organisms) #- REPEAT for varying majority-percentages: for percentage in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1]: #- calculate EC numbers occuring in group's core metabolism ourECnumbers = group.majorityEcGraph( majorityPercentage=percentage, noMultifunctional=False).getECs() #- reduce set of EC numbers to first three levels ourECnumbers = EcNumber.insertWildcards(ourECnumbers, keepLevels=3, allowHigherWildcards=False) #- overlap Poot-Hernandez' set with ours and print amount of EC numbers inside the intersection and falling off either side
'2.7.4.6', '2.7.4.8', '2.7.4.9', '2.7.6.3', '2.7.7.18', '2.7.7.2', '2.7.7.23', '2.7.7.27', '2.7.7.3', '2.7.7.38', '2.7.7.41', '2.7.8.5', '2.7.8.8', '3.1.3.45', '3.5.4.16', '3.5.4.25', '3.5.4.26', '3.6.1.1', '3.6.1.34', '3.6.1.45', '4.1.1.36', '4.1.1.65', '4.1.2.13', '4.1.2.16', '4.1.2.25', '4.2.1.10', '4.2.1.11', '4.6.1.3', '4.6.1.4', '5.1.1.3', '5.3.1.1', '5.3.1.13', '6.3.2.12', '6.3.2.13', '6.3.2.15', '6.3.2.4', '6.3.2.5', '6.3.2.8', '6.3.2.9' ] theirECnumbers = set() for string in theirECnumberStrings: theirECnumbers.add(EcNumber(string)) #- get group of organisms 'Escherichia coli' taxonomy = NCBI.getTaxonomy() group = Organism.Group( taxonomy.getOrganismAbbreviationsByPath('Escherichia coli', oneOrganismPerSpecies=False)) #- REPEAT for varying majority-percentages: for percentage in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1]: #- calculate EC numbers occuring in group's core metabolism ourECnumbersWithWildcard = group.majorityEcGraph( majorityPercentage=percentage, noMultifunctional=True).getECs() ourECnumbers = EcNumber.removeWildcards(ourECnumbersWithWildcard) #- overlap Almaas' set with ours and print amount of EC numbers inside the intersection and falling off either side onlyInTheirs = theirECnumbers.difference(ourECnumbers) inBoth = theirECnumbers.intersection(ourECnumbers) onlyInOurs = ourECnumbers.difference(theirECnumbers)