def test_consensusEcGraph_difference(self):

        FEV_KEGG.startProcessPool()

        enterobacteriales_organisms_abbreviations = [
            'eco', 'ses', 'sfl', 'ent', 'esa', 'kpn', 'cko', 'ype', 'spe',
            'buc'
        ]
        enterobacteriales_organisms = Organism.Group(
            organismAbbreviations=enterobacteriales_organisms_abbreviations)

        enterobacteriales_organisms_abbreviations = [
            'eco', 'ses', 'sfl', 'ent', 'esa', 'kpn', 'cko', 'ype', 'spe',
            'buc'
        ]
        gammaproteobacteria_organisms_abbreviations = [
            'hin', 'mht', 'xcc', 'vch', 'pae', 'acb', 'son', 'pha', 'amc',
            'lpn', 'ftu', 'aha'
        ]
        gammaproteobacteria_organisms_abbreviations.extend(
            enterobacteriales_organisms_abbreviations
        )  # extend with the sub-set, because they are also part of the set
        gammaproteobacteria_organisms = Organism.Group(
            organismAbbreviations=gammaproteobacteria_organisms_abbreviations)

        enterobacteriales_EC_graph = enterobacteriales_organisms.consensusEcGraph(
            noMultifunctional=True)
        gammaproteobacteria_EC_graph = gammaproteobacteria_organisms.consensusEcGraph(
            noMultifunctional=True)
        enterobacteriales_EC_set = enterobacteriales_EC_graph.getECs()
        gammaproteobacteria_EC_set = gammaproteobacteria_EC_graph.getECs()
        only_enterobacteriales_EC_set = enterobacteriales_EC_set.difference(
            gammaproteobacteria_EC_set)

        output = []
        for ec in only_enterobacteriales_EC_set:
            output.append(ec.__str__())

        result = len(output)
        print(str(result) + ' results')
        self.assertEqual(result, 87)

        enterobacteriales_enzyme_graph = enterobacteriales_organisms.collectiveEnzymeGraphByEcConsensus(
            noMultifunctional=True)
        gammaproteobacteria_enzyme_graph = gammaproteobacteria_organisms.collectiveEnzymeGraphByEcConsensus(
            noMultifunctional=True)
        enterobacteriales_enzyme_graph.removeMultifunctionalEnzymes()
        gammaproteobacteria_enzyme_graph.removeMultifunctionalEnzymes()
        enterobacteriales_enzymes = enterobacteriales_enzyme_graph.getEnzymes()
        gammaproteobacteria_enzymes = gammaproteobacteria_enzyme_graph.getEnzymes(
        )

        enterobacteriales_EC_set_2 = set()
        for enzyme in enterobacteriales_enzymes:
            ecNumbers = enzyme.ecNumbers
            enterobacteriales_EC_set_2.update(ecNumbers)

        gammaproteobacteria_EC_set_2 = set()
        for enzyme in gammaproteobacteria_enzymes:
            ecNumbers = enzyme.ecNumbers
            gammaproteobacteria_EC_set_2.update(ecNumbers)

        only_enterobacteriales_EC_set_2 = enterobacteriales_EC_set_2.difference(
            gammaproteobacteria_EC_set_2)

        output = []
        for ec in only_enterobacteriales_EC_set_2:
            output.append(ec.__str__())

        result2 = len(output)
        print(str(result2) + ' results')
        self.assertEqual(result2, result)

        output = []
        for ec in only_enterobacteriales_EC_set_2.symmetric_difference(
                only_enterobacteriales_EC_set):
            output.append(ec.__str__())
        result3 = len(output)
        print(str(result3) + ' results')
        self.assertEqual(result3, 0)

        for ecString in output:
            print(ecString)
Example #2
0
                'pha', 'pin', 'plu', 'ppr', 'rma', 'saz', 'sde', 'sdn', 'shm',
                'tcx', 'vfi', 'vvu', 'xca'
            ]
            organisms = representativeOrganisms
            output.append('Representative:')

        elif i == 2:

            #-     2. get group of organisms 'Gammaproteobacteria', excluding unclassified
            organisms = taxonomy.getOrganismAbbreviationsByPath(
                'Gammaproteobacteria',
                exceptPaths='unclassified',
                oneOrganismPerSpecies=False)
            output.append('\nGammaproteobacteria without unclassified:')

        group = Organism.Group(organisms)

        #-     REPEAT for varying majority-percentages:
        for percentage in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1]:

            #-         calculate EC numbers occuring in group's core metabolism
            ourECnumbers = group.majorityEcGraph(
                majorityPercentage=percentage,
                noMultifunctional=False).getECs()

            #-         reduce set of EC numbers to first three levels
            ourECnumbers = EcNumber.insertWildcards(ourECnumbers,
                                                    keepLevels=3,
                                                    allowHigherWildcards=False)

            #-         overlap Poot-Hernandez' set with ours and print amount of EC numbers inside the intersection and falling off either side
Example #3
0
"""
from FEV_KEGG.Evolution.Events import SimpleGeneDuplication, ChevronGeneDuplication, NeofunctionalisedECs,\
    NeofunctionalisedEnzymes
from FEV_KEGG.Evolution.Taxonomy import NCBI
import FEV_KEGG.KEGG.Organism as Organism

if __name__ == '__main__':

    output = []

    #- get NCBI taxonomy tree
    taxonomy = NCBI.getTaxonomy()

    #- get group of organisms 'Archaea/Thaumarchaeota'
    group = Organism.Group(
        taxonomy.getOrganismAbbreviationsByPath('Archaea/Thaumarchaeota',
                                                oneOrganismPerSpecies=False))

    #- get supergroup of organisms 'Archaea'
    supergroup = Organism.Group(
        taxonomy.getOrganismAbbreviationsByPath('Archaea',
                                                oneOrganismPerSpecies=False))

    #- calculate new EC numbers occuring in group's core metabolism compared to supergroup's core metabolism
    newECs = group.consensusEcGraph(
        noMultifunctional=True).getECs().difference(
            supergroup.consensusEcGraph(noMultifunctional=True).getECs())
    output.append('new EC numbers: ' + str(len(newECs)))

    #- calculate neofunctionalised EC numbers in group's core metabolism
    descendantEnzymeGraph = group.collectiveEnzymeGraphByEcConsensus(
Example #4
0
        '2.5.1.15', '2.5.1.19', '2.5.1.7', '2.5.1.9', '2.6.1.16', '2.7.1.107',
        '2.7.1.130', '2.7.1.23', '2.7.1.24', '2.7.1.26', '2.7.1.33', '2.7.2.3',
        '2.7.4.6', '2.7.4.8', '2.7.4.9', '2.7.6.3', '2.7.7.18', '2.7.7.2',
        '2.7.7.23', '2.7.7.27', '2.7.7.3', '2.7.7.38', '2.7.7.41', '2.7.8.5',
        '2.7.8.8', '3.1.3.45', '3.5.4.16', '3.5.4.25', '3.5.4.26', '3.6.1.1',
        '3.6.1.34', '3.6.1.45', '4.1.1.36', '4.1.1.65', '4.1.2.13', '4.1.2.16',
        '4.1.2.25', '4.2.1.10', '4.2.1.11', '4.6.1.3', '4.6.1.4', '5.1.1.3',
        '5.3.1.1', '5.3.1.13', '6.3.2.12', '6.3.2.13', '6.3.2.15', '6.3.2.4',
        '6.3.2.5', '6.3.2.8', '6.3.2.9'
    ]
    theirECnumbers = set()
    for string in theirECnumberStrings:
        theirECnumbers.add(EcNumber(string))

    #- get group of organisms 'Escherichia coli'
    eco = Organism.Organism('eco')

    #- calculate EC numbers occuring in eco's core metabolism
    ourECnumbersWithWildcard = eco.substanceEcGraph(
        noMultifunctional=True).getECs()
    ourECnumbers = EcNumber.removeWildcards(ourECnumbersWithWildcard)

    #- overlap Almaas' set with ours and print amount of EC numbers inside the intersection and falling off either side
    onlyInTheirs = theirECnumbers.difference(ourECnumbers)
    inBoth = theirECnumbers.intersection(ourECnumbers)
    onlyInOurs = ourECnumbers.difference(theirECnumbers)

    output.append(
        str(len(onlyInTheirs)) + '\t' + str(len(inBoth)) + '\t' +
        str(len(onlyInOurs)))
Example #5
0
        '2.7.4.6', '2.7.4.8', '2.7.4.9', '2.7.6.3', '2.7.7.18', '2.7.7.2',
        '2.7.7.23', '2.7.7.27', '2.7.7.3', '2.7.7.38', '2.7.7.41', '2.7.8.5',
        '2.7.8.8', '3.1.3.45', '3.5.4.16', '3.5.4.25', '3.5.4.26', '3.6.1.1',
        '3.6.1.34', '3.6.1.45', '4.1.1.36', '4.1.1.65', '4.1.2.13', '4.1.2.16',
        '4.1.2.25', '4.2.1.10', '4.2.1.11', '4.6.1.3', '4.6.1.4', '5.1.1.3',
        '5.3.1.1', '5.3.1.13', '6.3.2.12', '6.3.2.13', '6.3.2.15', '6.3.2.4',
        '6.3.2.5', '6.3.2.8', '6.3.2.9'
    ]
    theirECnumbers = set()
    for string in theirECnumberStrings:
        theirECnumbers.add(EcNumber(string))

    #- get group of organisms 'Escherichia coli'
    taxonomy = NCBI.getTaxonomy()
    group = Organism.Group(
        taxonomy.getOrganismAbbreviationsByPath('Escherichia coli',
                                                oneOrganismPerSpecies=False))

    #- REPEAT for varying majority-percentages:
    for percentage in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1]:

        #-    calculate EC numbers occuring in group's core metabolism
        ourECnumbersWithWildcard = group.majorityEcGraph(
            majorityPercentage=percentage, noMultifunctional=True).getECs()
        ourECnumbers = EcNumber.removeWildcards(ourECnumbersWithWildcard)

        #-    overlap Almaas' set with ours and print amount of EC numbers inside the intersection and falling off either side
        onlyInTheirs = theirECnumbers.difference(ourECnumbers)
        inBoth = theirECnumbers.intersection(ourECnumbers)
        onlyInOurs = ourECnumbers.difference(theirECnumbers)