Example #1
0
    def substanceReactionGraph(self) -> SubstanceReactionGraph:
        """
        NUKA's substance-reaction graph.
        
        Returns
        -------
        SubstanceReactionGraph
            Contains all substrates/products and all reactions known to KEGG's metabolic pathways.
        
        Raises
        ------
        HTTPError
            If any underlying organism, pathway, or gene does not exist.
        URLError
            If connection to KEGG fails.
        
        Note
        ----
        This SubstanceReactionGraph can **NOT** be converted into a SubstanceGeneGraph, as the pathways do not contain gene information!
        """
        mockOrganism = Organism(
            'ec'
        )  # 'ec' is not an organism abbreviation, but merely desribes that pathways shall contain EC numbers as edges. This returns the full pathways not specific to any species.
        pathwaysSet = mockOrganism.getMetabolicPathways(
            includeOverviewMaps=False)
        substanceReactionGraph = SubstanceGraphs.Conversion.KeggPathwaySet2SubstanceReactionGraph(
            pathwaysSet, localVerbosity=0)
        substanceReactionGraph.name = 'Substance-Reaction NUKA'

        if init_verbosity > 0:
            print('calculated ' + substanceReactionGraph.name)

        return substanceReactionGraph
Example #2
0
    def getTransientMatches(
            self, relevantOrganisms: Iterable[str]) -> List[TransientMatch]:
        """
        Get full transient matches, considering only relevant orthologous organisms.
        
        Considering only relevant organisms is necessary, because a gene can have several thousand orthologs, including ones from organisms completely out of scope, while calculating the E-value for each of those matches is rather slow and involves several downloads.
        
        Parameters
        ----------
        relevantOrganisms : Iterable[str]
            Iterable of organism abbreviations, for each organism to be considered relevant.
        
        Returns
        -------
        List[TransientMatch]
            List of transient matches. These include E-values, which are slow to calculate, which is why only `relevantOrganisms` are considered.
            This means that only matches found in `self.bestMatches` which come from relevant organisms are actually converted to transient matches.
        """
        from FEV_KEGG.KEGG.Organism import Organism

        transientMatches = []

        # for all Matches
        for match in self.bestMatches:
            organismAbbreviation = match.foundGeneID.organismAbbreviation
            if organismAbbreviation in relevantOrganisms:  # Match is relevant

                # fetch relevant organism's info
                databaseSize = Organism(
                    organismAbbreviation).getNumberOfGenes()
                eValue = SequenceComparison.getExpectationValue(
                    match.bitScore, self.queryLength, match.length,
                    databaseSize)
                # calculate Transient Match
                transientMatches.append(TransientMatch.fromMatch(
                    match, eValue))

        return transientMatches
Example #3
0
    b2366 dsdA [4.3.1.18] D-serine dehydratase
    b3616 tdh [1.1.1.103] threonine 3-dehydrogenase
    b3617 kbl [2.3.1.29] glycine C-acetyltransferase


Conclusion
----------
Global map 01100 does not necessarily contain all enzymes for an organism.
"""
from FEV_KEGG.Graph.SubstanceGraphs import SubstanceReactionGraph, SubstanceGeneGraph, SubstanceEnzymeGraph
from FEV_KEGG.KEGG.Organism import Organism

if __name__ == '__main__':

    #- Download pathway definition as KGML.
    eco = Organism('eco')

    eco00260 = eco.getPathway('00260')
    eco01100 = eco.getPathway('01100')

    #- Convert to substance-reaction graph.
    eco00260_reactionGraph = SubstanceReactionGraph.fromPathway(eco00260)
    eco01100_reactionGraph = SubstanceReactionGraph.fromPathway(eco01100)

    #- Convert to substance-gene graph
    eco00260_geneGraph = SubstanceGeneGraph.fromSubstanceReactionGraph(
        eco00260_reactionGraph)
    eco01100_geneGraph = SubstanceGeneGraph.fromSubstanceReactionGraph(
        eco01100_reactionGraph)

    #- Convert to substance-enzyme graph
Example #4
0
- Some EC numbers themselves are missing.

The combination of all single pathways has the following shortcomings:

- Some EC numbers themselves are missing. Due to missing "reaction tags" (fixable without 01100) AND due to missing gene entries (not fixable without 01100).

Solution A: Use combined single pathways, ignore missing EC numbers.
Solution B: Use combined single pathways, repair by comparison with overview map 01100, adding missing "reaction tags" and missing gene entries.
"""
from FEV_KEGG.Graph.SubstanceGraphs import SubstanceReactionGraph, SubstanceGeneGraph, SubstanceEcGraph
from FEV_KEGG.KEGG.Organism import Organism

if __name__ == '__main__':

    #- Download pathway description as KGML.
    eco = Organism('eco')

    eco01100 = eco.getPathway('01100')
    allNonOverviewPathways = eco.getMetabolicPathways(
        includeOverviewMaps=False)

    #- Convert to substance-reaction graph.
    eco01100_reactionGraph = SubstanceReactionGraph.fromPathway(eco01100)
    allNonOverviewPathways_reactionGraph = SubstanceReactionGraph.fromPathway(
        allNonOverviewPathways)

    #- Convert to substance-gene graph.
    eco01100_geneGraph = SubstanceGeneGraph.fromSubstanceReactionGraph(
        eco01100_reactionGraph)
    allNonOverviewPathways_geneGraph = SubstanceGeneGraph.fromSubstanceReactionGraph(
        allNonOverviewPathways_reactionGraph)