예제 #1
0
def adjust_protons(formula, protons):
    """
    @param formula: chemical formula as string
    @param protons: number of hydrogens to add/remove as intager
    @return: new formula as string
    """
    if not protons:
        return (formula, "")
    protons = int(protons)
    Notes = ""
    #The whole function assumes that there is a single formula string
    #If the formula can be broken into components, it must first be merged
    #This is because the proton layer only ever has a single component
    if (len(formula.split('.')) > 1):
        print(
            "Error: you must merge the formula components into a single formula string"
        )
        print("You can do so using Compounds.mergeFormula()")
        return formula, "Unadjustable due to multiple components"

    atoms = Compounds.parseFormula(formula)
    if "H" in atoms:
        atoms['H'] += protons
        if atoms['H'] < 0:
            Notes = 'Too Many Protons adjusted!'
        if atoms['H'] == 0:
            del atoms['H']
    elif (len(atoms) == 0):
        #special case for the proton
        atoms['H'] = protons

    formula = Compounds.buildFormula(atoms)
    return (formula, Notes)
예제 #2
0
    def __init__(self,
                 biochem_root='../../Biochemistry/',
                 rxns_file='reactions.tsv'):
        self.BiochemRoot = biochem_root
        self.RxnsFile = biochem_root + rxns_file
        self.AliasFile = biochem_root + "Aliases/Reactions_Aliases.tsv"

        reader = DictReader(open(self.RxnsFile), dialect='excel-tab')
        self.Headers = reader.fieldnames

        from BiochemPy import Compounds
        self.CompoundsHelper = Compounds()
        self.Compounds_Dict = self.CompoundsHelper.loadCompounds()
예제 #3
0
def parse(inchi, merge_formula=False):
    """
    @param inchi: InChI string
    @param merge_formula: bool, use (not yet implemented) "merge_formulas"
    @return:
    formula string and dictionary of layers where key is layer code and value
    is layer contents
    """
    layer_dict = dict([(x, "") for x in InChI_Layers])

    # special case for proton
    m = re.match('^InChI=1S/p([-+]\d*)', inchi)
    if m:
        layer_dict['p'] = m.group(1)
        return "", layer_dict

    layers = inchi.split("/")[1:]
    formula = layers.pop(0)
    if merge_formula:
        formula = Compounds.mergeFormula(formula)

    for l in layers:
        layer_dict[l[0]] = l[1:]

    return formula, layer_dict
예제 #4
0
    def __init__(self,
                 biochem_root='../../../Biochemistry/',
                 rxns_file='reactions.tsv'):

        self.BiochemRoot = os.path.dirname(__file__) + '/' + biochem_root
        self.RxnsFile = self.BiochemRoot + rxns_file
        self.AliasFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Aliases.txt"
        self.NameFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Names.txt"
        self.PwyFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Pathways.txt"
        self.ECFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_ECs.txt"

        reader = DictReader(open(self.RxnsFile), dialect='excel-tab')
        self.Headers = reader.fieldnames

        from BiochemPy import Compounds
        self.CompoundsHelper = Compounds()
        self.Compounds_Dict = self.CompoundsHelper.loadCompounds()
예제 #5
0
def build(formula, layers, remove=(), merge_formula=False):
    """
    I use 'remove' to strip p, q, and stereochemical layers depending on how I
    want to compare InChI strings
    @param formula: Formula string
    @param layers: layers dictionary
    @param remove: a dictionary of layer codes that have to be removed from InChI string
    @param merge_formula: bool, use (not yet implemented) "merge_formulas"
    @return: InChI string
    """
    if merge_formula:
        formula = Compounds.mergeFormula(formula)
    inchi = "/".join(
        ["InChI=1S"] + [formula] +
        [layers[x] for x in InChI_Layers if layers[x] and x not in remove])
    # if no valid layers return blank string
    return inchi if len(inchi) > 8 else ""
#!/usr/bin/env python
import os, sys, re
temp = list()
header = 1

from BiochemPy import Compounds

import pybel
from rdkit.Chem import AllChem
from rdkit import RDLogger
lg = RDLogger.logger()
lg.setLevel(RDLogger.ERROR)

#Load Structures and Aliases
CompoundsHelper = Compounds()
Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChI"],
                                                 ["KEGG", "MetaCyc"])

Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/"
file_handle_dict = dict()
for source in "KEGG", "MetaCyc":
    for struct_type in "InChI", "SMILE":
        for struct_stage in "Charged", "Original":
            file_string = "_".join((source, struct_type, struct_stage))
            file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt"
            file_handle_dict[file_string] = open(file_name, "w")

resolved_structures = open('Resolved_Structures.txt', 'w')
unresolved_structures = open('Unresolved_Structures.txt', 'w')
for struct_type in sorted(Structures_Dict.keys()):
예제 #7
0
#!/usr/bin/env python
import os, sys
from csv import DictReader
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Aliases_Dict = CompoundsHelper.loadMSAliases()
Names_Dict = CompoundsHelper.loadNames()

Source_Classes = dict()
reader = DictReader(
    open('../../../Biochemistry/Aliases/Source_Classifiers.txt'),
    dialect='excel-tab')
for line in reader:
    if (line['Source Type'] not in Source_Classes):
        Source_Classes[line['Source Type']] = dict()
    Source_Classes[line['Source Type']][line['Source ID']] = 1

for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Aliases_Dict):
        continue

    Cpd_Aliases = dict()
    Alias_Count = 0
    for source_type in 'Primary Database', 'Secondary Database', 'Published Model':
        for source in sorted(Aliases_Dict[cpd].keys()):
#!/usr/bin/env python
import os
import sys
import json
from BiochemPy import Compounds

#Load Compounds
CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()

Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/"
Formulas_Dict = dict()
for source in "KEGG", "MetaCyc":
    if (source not in Formulas_Dict):
        Formulas_Dict[source] = dict()

    for struct_type in "InChI", "SMILE":
        if (struct_type not in Formulas_Dict[source]):
            Formulas_Dict[source][struct_type] = dict()

        for struct_stage in "Charged", "Original":
            if (struct_stage not in Formulas_Dict[source][struct_type]):
                Formulas_Dict[source][struct_type][struct_stage] = dict()

            file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt"
            with open(file_name) as file_handle:
                for line in file_handle.readlines():
                    line = line.strip()
                    array = line.split('\t')
                    Formulas_Dict[source][struct_type][struct_stage][
                        array[0]] = {
#!/usr/bin/env python
import os, sys, re
temp = list()
from BiochemPy import Reactions, Compounds
compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
reactions_codes = reactions_helper.generateCodes(reactions_dict,
                                                 stoich=False,
                                                 transport=False)

names_dict = compounds_helper.loadNames()
searchnames_dict = dict()
for msid in sorted(names_dict):
    for name in names_dict[msid]:
        searchname = compounds_helper.searchname(name)
        #Avoid redundancy where possible
        if (searchname not in searchnames_dict):
            searchnames_dict[searchname] = msid

mhc = open('Mishit_Compound_Names.txt', 'w')
with open('Parsed_Enzyme_Equations.txt') as fh:
    for line in fh.readlines():
        line = line.strip()
        (id, old_equation) = line.split('\t')
        array = re.split(' (<?=>?|\+) ', old_equation)

        new_array = list()
        mishit = False
        for i in range(len(array)):
예제 #10
0
#!/usr/bin/env python
import os, sys

temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()

Compound_To_Merge_From = "cpd00013"
Compound_To_Merge_To = "cpd19013"

Cpds_Rxns_Dict = dict()
Rxns_Cpds_Dict = dict()
for rxn in Reactions_Dict.keys():
    if (Reactions_Dict[rxn]["status"] == "EMPTY"):
        continue

    for rgt in Reactions_Dict[rxn]["stoichiometry"].split(";"):
        (coeff, cpd, cpt, index, name) = rgt.split(":", 4)

        if (cpd not in Cpds_Rxns_Dict):
            Cpds_Rxns_Dict[cpd] = dict()
        Cpds_Rxns_Dict[cpd][rxn] = 1
예제 #11
0
#!/usr/bin/env python
import os, sys

temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Compounds

CompoundsHelper = Compounds()
Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChIKey"],
                                                 ["ModelSEED"])
Compounds_Dict = CompoundsHelper.loadCompounds()

for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Structures_Dict):
        Compounds_Dict[cpd]['inchikey'] = ""
        Compounds_Dict[cpd]['smiles'] = ""
    else:
        Compounds_Dict[cpd]['inchikey'] = Structures_Dict[cpd].get(
            'InChIKey', "")
        Compounds_Dict[cpd]['smiles'] = Structures_Dict[cpd].get('SMILE', "")

print "Saving compounds"
CompoundsHelper.saveCompounds(Compounds_Dict)
#!/usr/bin/env python
from BiochemPy import Reactions, Compounds
import sys

remove_index = 0
remove_string = 'ontology'

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

for header in range(len(compounds_helper.Headers)):
    if (compounds_helper.Headers[header] == remove_string):
        remove_index = header

del compounds_helper.Headers[remove_index]

for cpd in compounds_dict:
    del compounds_dict[cpd][remove_string]

compounds_helper.saveCompounds(compounds_dict)

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

for header in range(len(reactions_helper.Headers)):
    if (reactions_helper.Headers[header] == remove_string):
        remove_index = header

del reactions_helper.Headers[remove_index]

for rxn in reactions_dict:
예제 #13
0
class Reactions:
    def __init__(self,
                 biochem_root='../../../Biochemistry/',
                 rxns_file='reactions.tsv'):

        self.BiochemRoot = os.path.dirname(__file__) + '/' + biochem_root
        self.RxnsFile = self.BiochemRoot + rxns_file
        self.AliasFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Aliases.txt"
        self.NameFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Names.txt"
        self.PwyFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Pathways.txt"
        self.ECFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_ECs.txt"

        reader = DictReader(open(self.RxnsFile), dialect='excel-tab')
        self.Headers = reader.fieldnames

        from BiochemPy import Compounds
        self.CompoundsHelper = Compounds()
        self.Compounds_Dict = self.CompoundsHelper.loadCompounds()

    def loadReactions(self):
        reader = DictReader(open(self.RxnsFile), dialect='excel-tab')
        type_mapping = {
            "is_transport": int,
            "is_obsolete": int,
            "deltag": float,
            "deltagerr": float
        }
        lists = ["aliases", "pathways", "ec_numbers", "notes"]
        dicts = []

        rxns_dict = dict()
        for line in reader:
            for list_type in lists:
                if (line[list_type] != "null"):
                    line[list_type] = line[list_type].split("|")
            for dict_type in dicts:
                if (line[dict_type] != "null"):
                    entries = line[dict_type].split('|')
                    line[dict_type] = dict()
                    for entry in entries:
                        (type, list) = entry.split(':')
                        line[dict_type][type] = list
            for heading, target_type in type_mapping.items():
                try:
                    line[heading] = target_type(line[heading])
                except ValueError:  # Generally caused by "null" strings
                    line[heading] = None
            rxns_dict[line['id']] = line

        return rxns_dict

    def parseEquation(self, equation_string):
        rxn_cpds_array = list()
        reagent = -1
        coeff = 1
        index = 0
        for text in equation_string.split(" "):
            if (text == "+"):
                continue

            match = re.search('^<?=>?$', text)
            if (match is not None):
                reagent = 1

            match = re.search('^\((\d+(?:\.\d+)?)\)$', text)
            if (match is not None):
                coeff = match.group(1)

                # Correct for redundant ".0" in floats
                coeff = float(coeff)
                if (str(coeff)[-2:] == ".0"):
                    coeff = int(round(coeff))

            match = re.search('^(cpd\d{5})\[(\d)\]$', text)
            if (match is not None):

                #Side of equation
                coeff = coeff * reagent

                (cpd, cpt) = (match.group(1), match.group(2))
                rgt_id = cpd + "_" + cpt + str(index)
                cpt = int(cpt)
                name = self.Compounds_Dict[cpd]["name"]
                formula = self.Compounds_Dict[cpd]["formula"]
                charge = self.Compounds_Dict[cpd]["charge"]

                rxn_cpds_array.append({
                    "reagent": rgt_id,
                    "coefficient": coeff,
                    "compound": cpd,
                    "compartment": cpt,
                    "index": index,
                    "name": name,
                    "formula": formula,
                    "charge": charge
                })

                #Need to reset coeff for next compound
                coeff = 1

        return rxn_cpds_array

    def parseStoich(self, stoichiometry):
        rxn_cpds_array = list()

        #For empty reaction
        if (stoichiometry == ""):
            return rxn_cpds_array

        for rgt in stoichiometry.split(";"):
            (coeff, cpd, cpt, index, name) = rgt.split(":", 4)
            rgt_id = cpd + "_" + cpt + index

            coeff = float(coeff)

            # Correct for redundant ".0" in floats
            if (str(coeff)[-2:] == ".0"):
                coeff = int(round(coeff))

            cpt = int(cpt)
            index = int(index)

            rxn_cpds_array.append({
                "reagent":
                rgt_id,
                "coefficient":
                coeff,
                "compound":
                cpd,
                "compartment":
                cpt,
                "index":
                index,
                "name":
                name,
                "formula":
                self.Compounds_Dict[cpd]["formula"],
                "charge":
                self.Compounds_Dict[cpd]["charge"]
            })
        return rxn_cpds_array

    def parseStoichOnt(self, stoichiometry):
        rxn_cpds_dict = dict()

        #For empty reaction
        if (stoichiometry == ""):
            return rxn_cpds_array

        for rgt in stoichiometry.split(";"):
            (coeff, cpd, cpt, index, name) = rgt.split(":", 4)
            cpd_cpt_tuple = (cpd, cpt)
            rxn_cpds_dict[cpd_cpt_tuple] = coeff

        return rxn_cpds_dict

    # The basis for this code, and producing combinations of ontologically related reactions
    # was found in Filipe's code (see commit: 92db86)
    def generateOntologyReactionCodes(self, rxn_id, rxn_cpds, cpds_neighbors):

        # returns list of reaction codes to match with biochemistry
        new_codes = dict()

        replacements = list()
        for cpd_cpt_tuple in rxn_cpds:
            replace_list = list()
            cpd_id = cpd_cpt_tuple[0]
            if cpd_id in cpds_neighbors:
                for neighbor_id in cpds_neighbors[cpd_id]:
                    replace_list.append((cpd_id, neighbor_id))

            if len(replace_list) > 0:
                replacements.append(replace_list)

        # Iterate through different numbers of compounds to replace
        # i.e. replace 1 compound, replace 2 compounds etc.
        # The output is a list of all the possible combination of replacements to explore
        replacement_product = list()
        for n_cpds in range(1, len(replacements) + 1):
            combination = list(itertools.combinations(replacements, n_cpds))
            for entry in combination:
                product_list = list(itertools.product(*entry))
                replacement_product += product_list

        if (len(replacements) == 0):
            return new_codes

        for entry in replacement_product:

            # Old code assumed that all "new" compounds were unique
            # cpd_swap_dict = {x:y for x, y in entry}
            # new_swapped_rxn_cpds = { (x if not x in cpd_swap_dict else cpd_swap_dict[x], c):y
            #                          for (x, c), y in rxn_cpds.items() }

            # Regenerate array of cpd dicts for use with generateCode()
            swapped_rxn_cpds_array = list()
            for (cpd, cpt), coeff in rxn_cpds.items():
                new_cpd = cpd
                for old, new in entry:
                    if (cpd == old):
                        new_cpd = new
                reagent = {
                    "reagent": new_cpd + '_' + cpt + '0',
                    "compartment": cpt,
                    "coefficient": float(coeff)
                }

                # Correct for redundant ".0" in floats
                if (str(reagent["coefficient"])[-2:] == ".0"):
                    reagent["coefficient"] = int(round(reagent["coefficient"]))

                swapped_rxn_cpds_array.append(reagent)
            new_code = self.generateCode(swapped_rxn_cpds_array)
            new_codes[new_code] = entry
        return new_codes

    @staticmethod
    def isTransport(rxn_cpds_array):
        compartments_dict = dict()
        for rgt in rxn_cpds_array:
            compartments_dict[rgt['compartment']] = 1
        if (len(compartments_dict.keys()) > 1):
            return 1
        else:
            return 0

    def generateCodes(self, rxns_dict, check_obsolete=True):
        codes_dict = dict()
        for rxn in rxns_dict:
            if (rxns_dict[rxn]['status'] == "EMPTY"):
                continue
            if (check_obsolete is False
                    and rxns_dict[rxn]['is_obsolete'] == 1):
                continue
            rxn_cpds_array = self.parseStoich(rxns_dict[rxn]['stoichiometry'])
            code = self.generateCode(rxn_cpds_array)
            if (code not in codes_dict):
                codes_dict[code] = dict()
            codes_dict[code][rxn] = 1
        return codes_dict

    def generateCode(self, rxn_cpds_array):

        #It matters if its a transport reaction, and we include protons when matching transport
        is_transport = self.isTransport(rxn_cpds_array)

        #It matters which side of the equation, so build reagents and products arrays
        reagents = list()
        products = list()
        for rgt in sorted(rxn_cpds_array,
                          key=lambda x: (x["reagent"], x["coefficient"])):
            #skip protons
            if ("cpd00067" in rgt["reagent"] and is_transport == 0):
                continue

            if (rgt["coefficient"] < 0):
                reagents.append(rgt["reagent"] + ":" +
                                str(abs(rgt["coefficient"])))
            if (rgt["coefficient"] > 0):
                products.append(rgt["reagent"] + ":" +
                                str(abs(rgt["coefficient"])))

        rgt_string = "|".join(reagents)
        pdt_string = "|".join(products)
        #Sorting the overall strings here helps with matching transporters
        rxn_string = "|=|".join(sorted([rgt_string, pdt_string]))
        return rxn_string

    @staticmethod
    def buildStoich(rxn_cpds_array):
        stoichiometry_array = list()
        for rgt in sorted(rxn_cpds_array,
                          key=lambda x:
                          (int(x["coefficient"] > 0), x["reagent"])):

            # Correct for redundant ".0" in floats
            if (str(rgt["coefficient"])[-2:] == ".0"):
                rgt["coefficient"] = int(round(rgt["coefficient"]))

            rgt["coefficient"] = str(rgt["coefficient"])
            rgt["compartment"] = str(rgt["compartment"])
            rgt["index"] = str(rgt["index"])

            rgt_string = ":".join([
                rgt["coefficient"], rgt["compound"], rgt["compartment"],
                rgt["index"], rgt["name"]
            ])
            stoichiometry_array.append(rgt_string)
        stoichiometry_string = ";".join(stoichiometry_array)
        return stoichiometry_string

    @staticmethod
    def removeCpdRedundancy(rgts_array):

        rgts_dict = dict()
        for rgt in rgts_array:
            if (rgt["reagent"] not in rgts_dict):
                rgts_dict[rgt["reagent"]] = 0
            rgts_dict[rgt["reagent"]] += float(rgt["coefficient"])

        new_rgts_array = list()
        for rgt in rgts_array:
            if (rgts_dict[rgt["reagent"]] == 0):
                continue

            rgt["coefficient"] = rgts_dict[rgt["reagent"]]

            # Correct for redundant ".0" in floats
            if (str(rgt["coefficient"])[-2:] == ".0"):
                rgt["coefficient"] = int(round(rgt["coefficient"]))

            new_rgts_array.append(rgt)

            #Trick to exclude reagent if it appears in array more than once
            rgts_dict[rgt["reagent"]] = 0

        return new_rgts_array

    def balanceReaction(self, rgts_array):
        if (len(rgts_array) == 0):
            return "EMPTY"

        ########################################
        # Check that each reagent is either a
        # different compound or in a different
        # compartment, and report.
        ########################################
        rgts_dict = dict()
        for rgt in rgts_array:
            if (rgt["reagent"] not in rgts_dict):
                rgts_dict[rgt["reagent"]] = 0
            rgts_dict[rgt["reagent"]] += 1

        for rgt in rgts_dict.keys():
            if (rgts_dict[rgt] > 1):
                return "Duplicate reagents"

        ########################################
        # Check for duplicate compounds in
        # different compartments, these are
        # balanced directly.
        #######################################
        cpds_coeff_dict = dict()
        for rgt in rgts_array:
            cpd = rgt["compound"]
            if (cpd not in cpds_coeff_dict):
                cpds_coeff_dict[cpd] = 0

            # Use float() because you can get real coefficients
            cpds_coeff_dict[cpd] += float(rgt["coefficient"])

        # Build dict of compounds
        cpds_dict = dict()
        for rgt in rgts_array:
            #Skip trans-compartmental compounds
            if (cpds_coeff_dict[rgt["compound"]] == 0):
                continue

            proxy_rgt = copy.deepcopy(rgt)
            proxy_rgt["coefficient"] = cpds_coeff_dict[rgt["compound"]]
            cpds_dict[rgt["compound"]] = proxy_rgt

        ########################################
        # Check for duplicate elements, across
        # all compounds, these are balanced
        # directly.
        #######################################
        rxn_net_charge = 0.0
        rxn_net_mass = dict()
        cpdformerror = list()
        for cpd in cpds_dict.keys():
            cpd_atoms = self.CompoundsHelper.parseFormula(
                cpds_dict[cpd]["formula"])

            if (len(cpd_atoms.keys()) == 0):
                #Here we can skip photons and electrons
                #They are the valid compounds with no mass
                if (cpd == 'cpd11632' or cpd == 'cpd12713'):
                    pass
                else:
                    cpdformerror.append(cpd)

            cpd_coeff_charge = float(cpds_dict[cpd]["charge"]) * float(
                cpds_dict[cpd]["coefficient"])
            rxn_net_charge += cpd_coeff_charge

            for atom in cpd_atoms.keys():
                atom_coeff_mass = float(cpd_atoms[atom]) * float(
                    cpds_dict[cpd]["coefficient"])

                if (atom not in rxn_net_mass.keys()):
                    rxn_net_mass[atom] = 0.0

                rxn_net_mass[atom] += atom_coeff_mass

        if (len(cpdformerror) > 0):
            return "CPDFORMERROR"

        # Round out tiny numbers that occur because we add/substract floats
        # Threshold of 1e-6 found to capture all these instances without
        # removing actual small differences in mass.
        for atom in rxn_net_mass.keys():
            if (rxn_net_mass[atom] > -1e-6 and rxn_net_mass[atom] < 1e-6):
                rxn_net_mass[atom] = 0

        if (rxn_net_charge > -1e-6 and rxn_net_charge < 1e-6):
            rxn_net_charge = 0

        # Report any imbalance
        imbalanced_atoms_array = list()
        for atom in sorted(rxn_net_mass.keys()):
            if (rxn_net_mass[atom] == 0):
                continue

            rxn_net_mass[atom] = "{0:.2f}".format(rxn_net_mass[atom])

            # Correct for redundant ".00" in floats
            if (rxn_net_mass[atom][-3:] == ".00"):
                rxn_net_mass[atom] = str(int(float(rxn_net_mass[atom])))

            imbalanced_atoms_array.append(atom + ":" + rxn_net_mass[atom])

        rxn_net_charge = "{0:.2f}".format(rxn_net_charge)

        # Correct for redundant ".00" in floats
        if (rxn_net_charge[-3:] == ".00"):
            rxn_net_charge = str(int(float(rxn_net_charge)))

        status = ""

        if (len(imbalanced_atoms_array) > 0):
            status = "MI:" + "/".join(imbalanced_atoms_array)

        if (rxn_net_charge != "0"):
            if (len(status) == 0):
                status = "CI:" + rxn_net_charge
            else:
                status += "|CI:" + rxn_net_charge

        if (status == ""):
            status = "OK"

        return status

    def adjustCompound(self,
                       rxn_cpds_array,
                       compound,
                       adjustment,
                       compartment=0):

        if (adjustment == 0):
            return rxn_cpds_array

        ######################################################################
        # We will always assume to adjust a compound automatically
        # in the compartment indexed as zero, unless otherwise specified.
        # This answers the question of how to handle transporters.
        ######################################################################

        # Check to see if it already exists
        cpd_exists = 0
        cpd_remove = {}
        for rgt in rxn_cpds_array:
            if (rgt["compound"] == compound
                    and rgt["compartment"] == compartment):
                rgt["coefficient"] -= adjustment
                cpd_exists = 1
                if (rgt["coefficient"] == 0):
                    cpd_remove = rgt

        if (cpd_exists != 1):
            rgt_id = compound + "_" + str(compartment) + "0"

            rxn_cpds_array.append({
                "reagent":
                rgt_id,
                "coefficient":
                0 - adjustment,
                "compound":
                compound,
                "compartment":
                compartment,
                "index":
                0,
                "name":
                self.Compounds_Dict[compound]["name"],
                "formula":
                self.Compounds_Dict[compound]["formula"],
                "charge":
                self.Compounds_Dict[compound]["charge"]
            })

        if (len(cpd_remove.keys()) > 0):
            rxn_cpds_array.remove(cpd_remove)

        #Got to adjust for floats
        for rgt in rxn_cpds_array:
            if (str(rgt["coefficient"])[-2:] == ".0"):
                rgt["coefficient"] = int(round(rgt["coefficient"]))

        return

    def replaceCompound(self, rxn_cpds_array, old_compound, new_compound):

        ######################################################################
        # We will always assume that we will maintain the coefficient.
        # We will always assume that we will replace in all compartments.
        # The adjustment will fail silently, returning an empty array
        # if the old_compound cannot be found.
        ######################################################################

        found_cpd = False
        for rgt in rxn_cpds_array:
            if (rgt["compound"] == old_compound):
                found_cpd = True
                rgt["compound"] = new_compound
                rgt["reagent"] = new_compound + "_" + str(
                    rgt["compartment"]) + "0"
                rgt["name"] = self.Compounds_Dict[new_compound]['name']

        return found_cpd

    def rebuildReaction(self, reaction_dict, stoichiometry=None):
        # Retrieve/Assign stoich
        if (stoichiometry is None):
            stoichiometry = reaction_dict['stoichiometry']
        else:
            reaction_dict["stoichiometry"] = stoichiometry

        # Build list of "reagents" and "products"
        rxn_cpds_array = self.parseStoich(stoichiometry)
        reagents_array = list()
        products_array = list()
        compound_ids_dict = dict()
        for rgt in rxn_cpds_array:
            compound_ids_dict[rgt["compound"]] = 1
            if (rgt["coefficient"] > 0):
                products_array.append(rgt)
            else:
                reagents_array.append(rgt)

        rgts_str__array = list()
        for rgt in reagents_array:
            id_string = "(" + str(abs(
                rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str(
                    rgt["compartment"]) + "]"
            rgts_str__array.append(id_string)

        equation_array = list()
        code_array = list()
        definition_array = list()

        equation_array.append(" + ".join(rgts_str__array))
        definition_array.append(" + ".join(rgts_str__array))
        code_array.append(" + ".join(x for x in rgts_str__array
                                     if "cpd00067" not in x))

        code_array.append("<=>")
        if (reaction_dict["direction"] == "="):
            equation_array.append("<=>")
            definition_array.append("<=>")
        elif (reaction_dict["direction"] == "<"):
            equation_array.append("<=")
            definition_array.append("<=")
        else:
            equation_array.append("=>")
            definition_array.append("=>")

        pdts_str_array = list()
        for rgt in products_array:
            id_string = "(" + str(abs(
                rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str(
                    rgt["compartment"]) + "]"
            pdts_str_array.append(id_string)

        equation_array.append(" + ".join(pdts_str_array))
        definition_array.append(" + ".join(pdts_str_array))
        code_array.append(" + ".join(x for x in pdts_str_array
                                     if "cpd00067" not in x))

        reaction_dict["code"] = " ".join(code_array)
        reaction_dict["equation"] = " ".join(equation_array)
        reaction_dict["definition"] = " ".join(definition_array)
        reaction_dict["compound_ids"] = ";".join(
            sorted(compound_ids_dict.keys()))

        # Replace ids with names in Definition
        for cpd_id in compound_ids_dict.keys():
            if (cpd_id in reaction_dict["definition"]):
                reaction_dict["definition"] = reaction_dict[
                    "definition"].replace(cpd_id,
                                          self.Compounds_Dict[cpd_id]["name"])

        # Define if transport?

        return

    def saveECs(self, ecs_dict):
        ecs_root = os.path.splitext(self.ECFile)[0]

        # Print to TXT
        ecs_file = open(ecs_root + ".txt", 'w')
        ecs_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) +
                       "\n")
        for rxn in sorted(ecs_dict.keys()):
            for name in sorted(ecs_dict[rxn]):
                ecs_file.write("\t".join((rxn, name, 'Enzyme Class')) + "\n")
        ecs_file.close()

    def saveNames(self, names_dict):
        names_root = os.path.splitext(self.NameFile)[0]

        # Print to TXT
        names_file = open(names_root + ".txt", 'w')
        names_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) +
                         "\n")
        for rxn in sorted(names_dict.keys()):
            for name in sorted(names_dict[rxn]):
                names_file.write("\t".join((rxn, name, 'name')) + "\n")
        names_file.close()

    def saveAliases(self, alias_dict):
        alias_root = os.path.splitext(self.AliasFile)[0]

        # Print to TXT
        alias_file = open(alias_root + ".txt", 'w')
        alias_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) +
                         "\n")
        for rxn in sorted(alias_dict.keys()):
            for source in sorted(alias_dict[rxn].keys()):
                for alias in sorted(alias_dict[rxn][source]):
                    alias_file.write("\t".join((rxn, alias, source)) + "\n")
        alias_file.close()

    def saveReactions(self, reactions_dict):
        rxns_root = os.path.splitext(self.RxnsFile)[0]

        # Print to TSV
        rxns_file = open(rxns_root + ".tsv", 'w')
        rxns_file.write("\t".join(self.Headers) + "\n")
        for rxn in sorted(reactions_dict.keys()):
            values_list = list()
            for header in self.Headers:
                value = reactions_dict[rxn][header]
                if (isinstance(value, list)):
                    value = "|".join(value)
                if (isinstance(value, dict)):
                    entries = list()
                    for entry in value:
                        entries.append(entry + ':' + value[entry])
                    value = "|".join(entries)
                values_list.append(str(value))
            rxns_file.write("\t".join(values_list) + "\n")
        rxns_file.close()

        #Re-configure JSON
        new_reactions_dict = list()
        for rxn_id in sorted(reactions_dict):
            rxn_obj = reactions_dict[rxn_id]
            for key in rxn_obj:
                if (isinstance(rxn_obj[key], dict)):
                    for entry in rxn_obj[key]:
                        if (rxn_obj[key][entry] == "null"):
                            rxn_obj[key][entry] = None
                if (rxn_obj[key] == "null"):
                    rxn_obj[key] = None
            new_reactions_dict.append(rxn_obj)

        # Print to JSON
        rxns_file = open(rxns_root + ".json", 'w')
        rxns_file.write(
            json.dumps(new_reactions_dict, indent=4, sort_keys=True))
        rxns_file.close()

    def loadMSAliases(self, sources_array=[]):
        if (len(sources_array) == 0):
            sources_array.append("All")

        aliases_dict = dict()
        reader = DictReader(open(self.AliasFile), dialect='excel-tab')
        for line in reader:
            if ("rxn" not in line['ModelSEED ID']):
                continue

            if ("All" not in sources_array
                    and line['Source'] not in sources_array):
                continue

            if (line['ModelSEED ID'] not in aliases_dict):
                aliases_dict[line['ModelSEED ID']] = dict()

            for source in line['Source'].split('|'):
                if (source not in aliases_dict[line['ModelSEED ID']]):
                    aliases_dict[line['ModelSEED ID']][source] = list()

                aliases_dict[line['ModelSEED ID']][source].append(
                    line['External ID'])

        return aliases_dict

    def loadNames(self):
        names_dict = dict()
        reader = DictReader(open(self.NameFile), dialect='excel-tab')
        for line in reader:
            if ("rxn" not in line['ModelSEED ID']):
                continue

            if (line['ModelSEED ID'] not in names_dict):
                names_dict[line['ModelSEED ID']] = list()

            names_dict[line['ModelSEED ID']].append(line['External ID'])

        return names_dict

    def loadPathways(self):
        pathways_dict = dict()
        reader = DictReader(open(self.PwyFile), dialect='excel-tab')
        for line in reader:
            if ("rxn" not in line['ModelSEED ID']):
                continue

            if (line['ModelSEED ID'] not in pathways_dict):
                pathways_dict[line['ModelSEED ID']] = dict()

            if (line['Source'] not in pathways_dict[line['ModelSEED ID']]):
                pathways_dict[line['ModelSEED ID']][line['Source']] = list()

            pathways_dict[line['ModelSEED ID']][line['Source']].append(
                line['External ID'])

        return pathways_dict

    def loadECs(self):
        ecs_dict = dict()
        reader = DictReader(open(self.ECFile), dialect='excel-tab')
        for line in reader:
            if ("rxn" not in line['ModelSEED ID']):
                continue

            if (line['ModelSEED ID'] not in ecs_dict):
                ecs_dict[line['ModelSEED ID']] = list()

            ecs_dict[line['ModelSEED ID']].append(line['External ID'])

        return ecs_dict
예제 #14
0
output = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'],
                                 universal_newlines=True)
branch = output.strip()
Disambiguation_Object['metadata']['branch'] = branch

time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(time.time()))
Disambiguation_Object['metadata']['date_time'] = time_str

##########################################################
#
# Collect compound data
#
##########################################################
from BiochemPy import Reactions, Compounds

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

if (disambiguating_cpd not in compounds_dict):
    print("Error: compound " + disambiguating_cpd +
          " is not found in the ModelSEED database")
    sys.exit()

if (compounds_dict[disambiguating_cpd]['is_obsolete'] == 1):
    print("Warning: compound " + disambiguating_cpd +
          " is obsolete, consider using the non-obsolete version")

Disambiguation_Object['from'] = {
    'id': disambiguating_cpd,
    'structures': {},
    'aliases': {},
예제 #15
0
#!/usr/bin/env python
import os
import sys
import json
from BiochemPy import Compounds, Reactions

# Load Compounds
compounds_helper = Compounds()
aliases_dict = compounds_helper.loadMSAliases()

# Load Reactions
ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

# Load ACPs
Overridden_Fields = dict()
header = list()
with open(os.path.dirname(__file__) + '/ACPs_Master_Formula_Charge.txt') as fh:
    for line in fh.readlines():
        line = line.strip()
        array = line.split('\t')

        cpd = array.pop(0)

        if (len(header) == 0):
            header = array
            continue

        if (cpd not in Overridden_Fields):
            Overridden_Fields[cpd] = dict()
예제 #16
0
#!/usr/bin/env python
import os, sys, re, copy
from csv import DictReader
from collections import OrderedDict
temp = list()
header = True

Biochem = "MetaCyc"

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

names_dict = compounds_helper.loadNames()
searchnames_dict = dict()
all_names_dict = dict()
new_name_count = dict()
for msid in sorted(names_dict):
    for name in names_dict[msid]:
        all_names_dict[name] = 1

        searchname = compounds_helper.searchname(name)
        #Avoid redundancy where possible
        if (searchname not in searchnames_dict):
            searchnames_dict[searchname] = msid

original_alias_dict = compounds_helper.loadMSAliases()
source_alias_dict = dict()
all_aliases = dict()
예제 #17
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"])

Update_Compounds = 0
for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Structures_Dict or 'InChI' not in Structures_Dict[cpd]):
        continue

    current_formula = Compounds_Dict[cpd]['formula']

    if (current_formula != "null"):
        continue

    (inchi_formula, inchi_layers) = InChIs.parse(Structures_Dict[cpd]['InChI'])
    (inchi_formula, notes) = Compounds.mergeFormula(inchi_formula)
    (adjusted_inchi_formula,
     notes) = InChIs.adjust_protons(inchi_formula, inchi_layers['p'])

    if (adjusted_inchi_formula != current_formula):
        Compounds_Dict[cpd]['formula'] = adjusted_inchi_formula
        Update_Compounds += 1
예제 #18
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"])

diff_file = open("Compound_Formula_Differences.txt", 'w')
for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Structures_Dict):
        diff_file.write("Zero structures for " + cpd + "\n")
        continue

    if ('InChI' not in Structures_Dict[cpd]):
        diff_file.write("No InChI structure for " + cpd + "\n")
        continue

    current_formula = Compounds_Dict[cpd]['formula']

    #Parse out InChI formula
    (inchi_formula, inchi_layers) = InChIs.parse(Structures_Dict[cpd]['InChI'])

    #Make sure formula is merged appropriately before applying proton adjustment
    (inchi_formula, notes) = Compounds.mergeFormula(inchi_formula)
    if (notes != ""):
        diff_file.write("Notes from merging InChI formula for " + cpd + ": " +
예제 #19
0
#!/usr/bin/env python
import os, sys, re, copy
from csv import DictReader
from collections import OrderedDict
temp = list()
header = True

Biochem = "MetaCyc"
Biochem_Root = "../../Biochemistry/Aliases/Provenance/Primary_Databases/"

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
reactions_codes = reactions_helper.generateCodes(reactions_dict)

Default_Rxn = {
    "id": "cpd00001",
    "name": "null",
    "abbreviation": "null",
    "aliases": "null",
    "code": "null",
    "stoichiometry": "null",
    "equation": "null",
    "definition": "null",
    "reversibility": "=",
    "direction": "=",
    "deltag": "10000000",
예제 #20
0
Structures_Root=os.path.dirname(__file__)+"/../../Biochemistry/Structures/"

# Load pKas and pKbs
cpd_pKab_dict=dict()
for DB in ["KEGG","MetaCyc"]:
    with open(Structures_Root+DB+'/pKa_Strings.txt') as fh:
        for line in fh.readlines():
            line=line.strip()
            array=line.split('\t')
            if(array[0] not in cpd_pKab_dict):
                cpd_pKab_dict[array[0]]={array[1]:array[2]}
            else:
                cpd_pKab_dict[array[0]][array[1]]=array[2]

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
structures_dict = compounds_helper.loadStructures(["SMILE","InChI","InChIKey"],["ModelSEED"])
aliases_dict = compounds_helper.loadMSAliases()

# We're removing all pKa and pKb before loading new ones
for cpd in compounds_dict:
    compounds_dict[cpd]['pka']=""
    compounds_dict[cpd]['pkb']=""

# We're only loading pKa/pKb for compounds that have an accepted unique structure in ModelSEED
for cpd in structures_dict:
    found=False
    for DB in ["KEGG","MetaCyc"]:
        if(found is True or DB not in aliases_dict[cpd]):
            continue
예제 #21
0
#!/usr/bin/env python
import os
import sys
import subprocess
import time
import copy
import re
import json
from collections import OrderedDict
from BiochemPy import Reactions, Compounds

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
cpds_aliases_dict = compounds_helper.loadMSAliases()
cpds_names_dict = compounds_helper.loadNames()
structures_dict = compounds_helper.loadStructures(["InChI","SMILE"],["ModelSEED"])

for cpd in cpds_names_dict:
    if(cpd not in compounds_dict):
        print(cpd+" shouldn't be in names_dict")

for cpd in cpds_aliases_dict:
    if(cpd not in compounds_dict):
        print(cpd+" shouldn't be in aliases_dict")

for cpd in structures_dict:
    if(cpd not in compounds_dict):
        print(cpd+" shouldn't be in structures_dict")

# Load Reactions
reactions_helper = Reactions()
#!/usr/bin/env python
import os, sys
temp=list();
header=1;

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
cpds_aliases_dict = compounds_helper.loadMSAliases()
cpds_names_dict = compounds_helper.loadNames()

# We actually don't want obsolete reactions and compounds in our database
# So we're striving to remove any 'new' ones that are obsolete
# Any information attached to them should be associated with their linked counterpart
# We need to retain older compounds that are now obsolete as these may be present in prior published models

# The number used here is the last compound entered before we re-integrated updates from KEGG and MetaCyc
# In the fall of 2018, so after this point, we'll take out obsolete compounds
last_cpd_str='cpd31000'
last_cpd_int=int(last_cpd_str[3:])

delete_cpds=list()
for cpd in compounds_dict:
    cpd_int = int(cpd[3:])
    if(cpd_int > last_cpd_int and compounds_dict[cpd]['is_obsolete']):
        delete_cpds.append(cpd)

for cpd in delete_cpds:
#!/usr/bin/env python
import os, sys
from BiochemPy import Compounds, Reactions, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

print("\n================")
print(
    "For Section: \"Computation of thermodynamic properties of ModelSEED compounds and reaction\"\n"
)

MS_Complete_Structures = dict()
with open("../../../Biochemistry/Structures/Unique_ModelSEED_Structures.txt"
          ) as fh:
    for line in fh.readlines():
        line = line.strip()
        array = line.split('\t')

        if ("InChI" in array[5]):
            MS_Complete_Structures[array[5]] = 1

MNX_Complete_Structures = dict()
with open("../../../Biochemistry/Structures/MetaNetX/chem_prop.tsv") as fh:
    header = 1
    for line in fh.readlines():
        if (line[0] == "#"):
            continue
예제 #24
0
class Reactions:
    def __init__(self,
                 biochem_root='../../Biochemistry/',
                 rxns_file='reactions.tsv'):
        self.BiochemRoot = biochem_root
        self.RxnsFile = biochem_root + rxns_file
        self.AliasFile = biochem_root + "Aliases/Reactions_Aliases.tsv"

        reader = DictReader(open(self.RxnsFile), dialect='excel-tab')
        self.Headers = reader.fieldnames

        from BiochemPy import Compounds
        self.CompoundsHelper = Compounds()
        self.Compounds_Dict = self.CompoundsHelper.loadCompounds()

    def loadReactions(self):
        reader = DictReader(open(self.RxnsFile), dialect='excel-tab')
        rxns_dict = dict()
        for line in reader:
            for header in ["is_transport", "is_obsolete"]:
                line[header] = int(line[header])
            rxns_dict[line['id']] = line

        return rxns_dict

    def parseStoich(self, stoichiometry):
        rxn_cpds_array = list()
        for rgt in stoichiometry.split(";"):
            (coeff, cpd, cpt, index, name) = rgt.split(":", 4)
            rgt_id = cpd + "_" + cpt + index

            coeff = float(coeff)

            # Correct for redundant ".0" in floats
            if (str(coeff)[-2:] == ".0"):
                coeff = int(round(coeff))

            cpt = int(cpt)
            index = int(index)

            rxn_cpds_array.append({
                "reagent":
                rgt_id,
                "coefficient":
                coeff,
                "compound":
                cpd,
                "compartment":
                cpt,
                "index":
                index,
                "name":
                name,
                "formula":
                self.Compounds_Dict[cpd]["formula"],
                "charge":
                self.Compounds_Dict[cpd]["charge"]
            })
        return rxn_cpds_array

    @staticmethod
    def isTransport(rxn_cpds_array):
        compartments_dict = dict()
        for rgt in rxn_cpds_array:
            compartments_dict[rgt['compartment']] = 1
        if (len(compartments_dict.keys()) > 1):
            return 1
        else:
            return 0

    def generateCodes(self, rxns_dict):
        codes_dict = dict()
        for rxn in rxns_dict:
            if (rxns_dict[rxn]['status'] == "EMPTY"):
                continue
            code = self.generateCode(rxns_dict[rxn]['stoichiometry'])
            if (code not in codes_dict):
                codes_dict[code] = dict()
            codes_dict[code][rxn] = 1
        return codes_dict

    def generateCode(self, stoichiometry):
        rxn_cpds_array = self.parseStoich(stoichiometry)

        #It matters if its a transport reaction, and we include protons when matching transpor
        is_transport = self.isTransport(rxn_cpds_array)

        #It matters which side of the equation, so build reagents and products arrays
        reagents = list()
        products = list()
        for rgt in sorted(rxn_cpds_array,
                          key=lambda x: (x["reagent"], x["coefficient"])):
            #skip protons
            if ("cpd00067" in rgt["reagent"] and is_transport == 0):
                continue

            if (rgt["coefficient"] < 0):
                reagents.append(rgt["reagent"] + ":" +
                                str(abs(rgt["coefficient"])))
            if (rgt["coefficient"] > 0):
                products.append(rgt["reagent"] + ":" +
                                str(abs(rgt["coefficient"])))

        rgt_string = "|".join(reagents)
        pdt_string = "|".join(products)
        #Sorting the overall strings here helps with matching transporters
        rxn_string = "|=|".join(sorted([rgt_string, pdt_string]))
        return rxn_string

    @staticmethod
    def buildStoich(rxn_cpds_array):
        stoichiometry_array = list()
        for rgt in sorted(rxn_cpds_array,
                          key=lambda x:
                          (int(x["coefficient"] > 0), x["reagent"])):

            # Correct for redundant ".0" in floats
            if (str(rgt["coefficient"])[-2:] == ".0"):
                rgt["coefficient"] = int(round(rgt["coefficient"]))

            rgt["coefficient"] = str(rgt["coefficient"])
            rgt["compartment"] = str(rgt["compartment"])
            rgt["index"] = str(rgt["index"])

            rgt_string = ":".join([
                rgt["coefficient"], rgt["compound"], rgt["compartment"],
                rgt["index"], rgt["name"]
            ])
            stoichiometry_array.append(rgt_string)
        stoichiometry_string = ";".join(stoichiometry_array)
        return stoichiometry_string

    def balanceReaction(self, rgts_array):
        if (len(rgts_array) == 0):
            return "EMPTY"

        ########################################
        # Check that each reagent is either a
        # different compound or in a different
        # compartment, and report.
        ########################################
        rgts_dict = dict()
        for rgt in rgts_array:
            if (rgt["reagent"] not in rgts_dict):
                rgts_dict[rgt["reagent"]] = 0
            rgts_dict[rgt["reagent"]] += 1

        for rgt in rgts_dict.keys():
            if (rgts_dict[rgt] > 1):
                return "ERROR: Duplicate reagents"

        ########################################
        # Check for duplicate compounds in
        # different compartments, these are
        # balanced directly.
        #######################################
        cpds_coeff_dict = dict()
        for rgt in rgts_array:
            cpd = rgt["compound"]
            if (cpd not in cpds_coeff_dict):
                cpds_coeff_dict[cpd] = 0

            # Use float() because you can get real coefficients
            cpds_coeff_dict[cpd] += float(rgt["coefficient"])

        # Build dict of compounds
        cpds_dict = dict()
        for rgt in rgts_array:
            rgt["coefficient"] = cpds_coeff_dict[rgt["compound"]]
            cpds_dict[rgt["compound"]] = rgt

        ########################################
        # Check for duplicate elements, across
        # all compounds, these are balanced
        # directly.
        #######################################
        rxn_net_charge = 0.0
        rxn_net_mass = dict()
        for cpd in cpds_dict.keys():
            cpd_atoms = self.CompoundsHelper.parseFormula(
                cpds_dict[cpd]["formula"])

            if (len(cpd_atoms.keys()) == 0):
                return "CPDFORMERROR"

            cpd_coeff_charge = float(cpds_dict[cpd]["charge"]) * float(
                cpds_dict[cpd]["coefficient"])
            rxn_net_charge += cpd_coeff_charge

            for atom in cpd_atoms.keys():
                atom_coeff_mass = float(cpd_atoms[atom]) * float(
                    cpds_dict[cpd]["coefficient"])

                if (atom not in rxn_net_mass.keys()):
                    rxn_net_mass[atom] = 0.0

                rxn_net_mass[atom] += atom_coeff_mass

        # Round out tiny numbers that occur because we add/substract floats
        # Threshold of 1e-6 found to capture all these instances without
        # removing actual small differences in mass.
        for atom in rxn_net_mass.keys():
            if (rxn_net_mass[atom] > -1e-6 and rxn_net_mass[atom] < 1e-6):
                rxn_net_mass[atom] = 0

        if (rxn_net_charge > -1e-6 and rxn_net_charge < 1e-6):
            rxn_net_charge = 0

        # Report any imbalance
        imbalanced_atoms_array = list()
        for atom in sorted(rxn_net_mass.keys()):
            if (rxn_net_mass[atom] == 0):
                continue

            # Correct for redundant ".0" in floats
            if (str(rxn_net_mass[atom])[-2:] == ".0"):
                rxn_net_mass[atom] = int(round(rxn_net_mass[atom]))

            imbalanced_atoms_array.append(atom + ":" + str(rxn_net_mass[atom]))

        # Correct for redundant ".0" in floats
        if (str(rxn_net_charge)[-2:] == ".0"):
            rxn_net_charge = int(rxn_net_charge)

        status = ""

        if (len(imbalanced_atoms_array) > 0):
            status = "MI:" + "/".join(imbalanced_atoms_array)

        if (rxn_net_charge != 0):
            if (len(status) == 0):
                status = "CI:" + str(rxn_net_charge)
            else:
                status += "|CI:" + str(rxn_net_charge)

        if (status == ""):
            status = "OK"

        return status

    def adjustCompound(self,
                       rxn_cpds_array,
                       compound,
                       adjustment,
                       compartment=0):

        if (adjustment == 0):
            return rxn_cpds_array

        ######################################################################
        # We will always assume to adjust a compound automatically
        # in the compartment indexed as zero, unless otherwise specified.
        # This answers the question of how to handle transporters.
        ######################################################################

        # Check to see if it already exists
        cpd_exists = 0
        cpd_remove = {}
        for rgt in rxn_cpds_array:
            if (rgt["compound"] == compound
                    and rgt["compartment"] == compartment):
                rgt["coefficient"] -= adjustment
                cpd_exists = 1
                if (rgt["coefficient"] == 0):
                    cpd_remove = rgt

        if (cpd_exists != 1):
            rgt_id = compound + "_" + str(compartment) + "0"

            rxn_cpds_array.append({
                "reagent":
                rgt_id,
                "coefficient":
                0 - adjustment,
                "compound":
                compound,
                "compartment":
                compartment,
                "index":
                0,
                "name":
                self.Compounds_Dict[compound]["name"],
                "formula":
                self.Compounds_Dict[compound]["formula"],
                "charge":
                self.Compounds_Dict[compound]["charge"]
            })

        if (len(cpd_remove.keys()) > 0):
            rxn_cpds_array.remove(cpd_remove)

        return

    def rebuildReaction(self, reaction_dict, stoichiometry):
        # Assign stoich
        reaction_dict["stoichiometry"] = stoichiometry

        # Build list of "reagents" and "products"
        rxn_cpds_array = self.parseStoich(stoichiometry)
        reagents_array = list()
        products_array = list()
        compound_ids_dict = dict()
        for rgt in rxn_cpds_array:
            compound_ids_dict[rgt["compound"]] = 1
            if (rgt["coefficient"] > 0):
                products_array.append(rgt)
            else:
                reagents_array.append(rgt)

        rgts_str__array = list()
        for rgt in reagents_array:
            id_string = "(" + str(abs(
                rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str(
                    rgt["compartment"]) + "]"
            rgts_str__array.append(id_string)

        equation_array = list()
        code_array = list()
        definition_array = list()

        equation_array.append(" + ".join(rgts_str__array))
        definition_array.append(" + ".join(rgts_str__array))
        code_array.append(" + ".join(x for x in rgts_str__array
                                     if "cpd00067" not in x))

        code_array.append("<=>")
        if (reaction_dict["direction"] == "="):
            equation_array.append("<=>")
            definition_array.append("<=>")
        elif (reaction_dict["direction"] == "<"):
            equation_array.append("<=")
            definition_array.append("<=")
        else:
            equation_array.append("=>")
            definition_array.append("=>")

        pdts_str_array = list()
        for rgt in products_array:
            id_string = "(" + str(abs(
                rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str(
                    rgt["compartment"]) + "]"
            pdts_str_array.append(id_string)

        equation_array.append(" + ".join(pdts_str_array))
        definition_array.append(" + ".join(pdts_str_array))
        code_array.append(" + ".join(x for x in pdts_str_array
                                     if "cpd00067" not in x))

        reaction_dict["code"] = " ".join(code_array)
        reaction_dict["equation"] = " ".join(equation_array)
        reaction_dict["definition"] = " ".join(definition_array)
        reaction_dict["compound_ids"] = ";".join(
            sorted(compound_ids_dict.keys()))

        # Replace ids with names in Definition
        for cpd_id in compound_ids_dict.keys():
            if (cpd_id in reaction_dict["definition"]):
                reaction_dict["definition"] = reaction_dict[
                    "definition"].replace(cpd_id,
                                          self.Compounds_Dict[cpd_id]["name"])

        return

    def saveReactions(self, reactions_dict):
        rxns_root = os.path.splitext(self.RxnsFile)[0]

        # Print to TSV
        rxns_file = open(rxns_root + ".tsv", 'w')
        rxns_file.write("\t".join(self.Headers) + "\n")
        for rxn in sorted(reactions_dict.keys()):
            rxns_file.write("\t".join(
                str(reactions_dict[rxn][header])
                for header in self.Headers) + "\n")
        rxns_file.close()

        # Print to JSON
        rxns_file = open(rxns_root + ".json", 'w')
        rxns_file.write(json.dumps(reactions_dict, indent=4, sort_keys=True))
        rxns_file.close()

    def loadMSAliases(self, sources_array=[]):
        if (len(sources_array) == 0):
            return {}

        aliases_dict = dict()
        reader = DictReader(open(self.AliasFile), dialect='excel-tab')
        for line in reader:
            if ("rxn" not in line['MS ID']):
                continue

            if (line['Source'] not in sources_array):
                continue

            if (line['MS ID'] not in aliases_dict):
                aliases_dict[line['MS ID']] = dict()

            if (line['Source'] not in aliases_dict[line['MS ID']]):
                aliases_dict[line['MS ID']][line['Source']] = list()

            aliases_dict[line['MS ID']][line['Source']].append(
                line['External ID'])

        return aliases_dict
예제 #25
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()

Update_Compounds = 0
for cpd in sorted(Compounds_Dict.keys()):
    old_formula = Compounds_Dict[cpd]["formula"]
    (new_formula, notes) = CompoundsHelper.mergeFormula(old_formula)

    if (notes != ""):
        Compounds_Dict[cpd]["notes"] = notes
        Update_Compounds = 1

    if (new_formula != old_formula):
        print("Updating " + cpd + ": " + old_formula + " --> " + new_formula)
        Compounds_Dict[cpd]["formula"] = new_formula
        Update_Compounds = 1

if (Update_Compounds == 1):
    print("Saving compounds")
    CompoundsHelper.saveCompounds(Compounds_Dict)
예제 #26
0
#!/usr/bin/env python
import os
import sys
import json
from BiochemPy import Compounds, Reactions

#Load Compounds
CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
MS_Aliases_Dict = CompoundsHelper.loadMSAliases(["MetaCyc", "PlantCyc"])

for cpd in MS_Aliases_Dict:
    if ('PlantCyc' not in MS_Aliases_Dict[cpd]):
        MS_Aliases_Dict[cpd]['PlantCyc'] = []
    if ('MetaCyc' not in MS_Aliases_Dict[cpd]):
        MS_Aliases_Dict[cpd]['MetaCyc'] = []

    print("\t".join([
        cpd, "|".join(MS_Aliases_Dict[cpd]['PlantCyc']),
        "|".join(MS_Aliases_Dict[cpd]["MetaCyc"])
    ]))

#Load Reactions
ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()
MS_Aliases_Dict = ReactionsHelper.loadMSAliases(["MetaCyc", "PlantCyc"])

for rxn in MS_Aliases_Dict:
    if ('PlantCyc' not in MS_Aliases_Dict[rxn]):
        MS_Aliases_Dict[rxn]['PlantCyc'] = []
    if ('MetaCyc' not in MS_Aliases_Dict[rxn]):
예제 #27
0
#!/usr/bin/env python
import os, sys
from BiochemPy import Compounds, Reactions, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

parents_children = dict()
children_parents = dict()
inchi_exception = "WQZGKKKJIJFFOK"
cpd_exception = "cpd00027"
for cpd in compounds_dict:
    cpd_obj = compounds_dict[cpd]
    if (not isinstance(cpd_obj['ontology'], dict)):
        continue

    if (inchi_exception not in cpd_obj['inchikey']):
        continue

    print(cpd_obj['id'], cpd_obj['inchikey'])
    if ('parent_class' in cpd_obj['ontology']):
        for cpd in cpd_obj['ontology']['parent_class'].split(";"):
            if (cpd_obj['id'] not in children_parents):
                children_parents[cpd_obj['id']] = dict()
            children_parents[cpd_obj['id']][cpd] = 1
            if (cpd not in parents_children):
                parents_children[cpd] = dict()
            parents_children[cpd][cpd_obj['id']] = 1

#    print( cpd_obj['id'], cpd_obj['ontology'], isinstance(cpd_obj['ontology'],dict) )
print(parents_children)
예제 #28
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"])

diff_file = open("Compound_Charge_Differences.txt", 'w')
for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Structures_Dict):
        #diff_file.write("Zero structures for "+cpd+"\n")
        continue

    if ('InChI' not in Structures_Dict[cpd]):
        #diff_file.write("No InChI structure for "+cpd+"\n")
        continue

    current_charge = float(Compounds_Dict[cpd]['charge'])

    #Parse out InChI formula and layers
    inchi = list(Structures_Dict[cpd]['InChI'].keys())[0]
    (inchi_formula, inchi_layers) = InChIs.parse(inchi)

    inchi_charge = InChIs.charge(inchi_layers['q'], inchi_layers['p'])

    if (inchi_charge != current_charge):
        if (inchikey not in mnx_inchikey_dict):
            mnx_inchikey_dict[inchikey] = mnx

        inchikey = "-".join(inchikey.split('-')[0:2])
        if (inchikey not in mnx_inchikey_dict):
            mnx_inchikey_dict[inchikey] = mnx

        inchikey = inchikey.split('-')[0]
        if (inchikey not in mnx_inchikey_dict):
            mnx_inchikey_dict[inchikey] = mnx

file_handle.close()

#Here we can cross-check the structures that are in ModelSEED to find ones where
#there is a match in eQuilibrator
compounds_helper = Compounds()
structures_dict = compounds_helper.loadStructures(["InChIKey"], ["ModelSEED"])
seed_mnx_structural_map = dict()
for cpd in structures_dict:
    structure_type = 'InChIKey'
    if (structure_type not in structures_dict[cpd]):
        #The load structures function will return all compounds, so have to
        #Check that the structure is there
        continue

    #As these are unique structures, i.e. 1-1 mapping with compound id,
    #there's only ever one in each list for each compound
    structure = list(structures_dict[cpd][structure_type].keys())[0]

    #Here we check on three levels, we check the full string
    #Then the deprotonated string, then the structure alone
예제 #30
0
#!/usr/bin/env python
import os, sys
temp=list();
header=1;

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Structures_Dict = CompoundsHelper.loadStructures(["InChI"],["ModelSEED"])

for cpd in sorted(Compounds_Dict.keys()):
    if(Compounds_Dict[cpd]['inchikey'] == '' or cpd not in Structures_Dict):
        continue

    (inchi_formula,inchi_layers) = InChIs.parse(Structures_Dict[cpd]['InChI'])
    merged_inchi_formula = CompoundsHelper.mergeFormula(inchi_formula)[0]
    adjusted_inchi_formula = (InChIs.adjust_protons(merged_inchi_formula,inchi_layers['p']))[0]

    if(adjusted_inchi_formula != Compounds_Dict[cpd]['formula']):

         adjusted_inchi_atoms_dict = CompoundsHelper.parseFormula(adjusted_inchi_formula)
         if('H' in adjusted_inchi_atoms_dict):
             del(adjusted_inchi_atoms_dict['H'])
         adjusted_inchi_protonfree_formula = CompoundsHelper.buildFormula(adjusted_inchi_atoms_dict)

         original_formula_atoms_dict = CompoundsHelper.parseFormula(Compounds_Dict[cpd]['formula'])
         if('H' in original_formula_atoms_dict):
             del(original_formula_atoms_dict['H'])
         original_formula_protonfree_formula = CompoundsHelper.buildFormula(original_formula_atoms_dict)