mnx_inchikey_dict[inchikey] = mnx

        inchikey = "-".join(inchikey.split('-')[0:2])
        if (inchikey not in mnx_inchikey_dict):
            mnx_inchikey_dict[inchikey] = mnx

        inchikey = inchikey.split('-')[0]
        if (inchikey not in mnx_inchikey_dict):
            mnx_inchikey_dict[inchikey] = mnx

file_handle.close()

#Here we can cross-check the structures that are in ModelSEED to find ones where
#there is a match in eQuilibrator
compounds_helper = Compounds()
structures_dict = compounds_helper.loadStructures(["InChIKey"], ["ModelSEED"])
seed_mnx_structural_map = dict()
for cpd in structures_dict:
    structure_type = 'InChIKey'
    if (structure_type not in structures_dict[cpd]):
        #The load structures function will return all compounds, so have to
        #Check that the structure is there
        continue

    #As these are unique structures, i.e. 1-1 mapping with compound id,
    #there's only ever one in each list for each compound
    structure = list(structures_dict[cpd][structure_type].keys())[0]

    #Here we check on three levels, we check the full string
    #Then the deprotonated string, then the structure alone
    #As per email from Elad and Moritz, we should not expect
#!/usr/bin/env python
import os, sys, re
temp = list()
header = 1

from BiochemPy import Compounds

import pybel
from rdkit.Chem import AllChem
from rdkit import RDLogger
lg = RDLogger.logger()
lg.setLevel(RDLogger.ERROR)

#Load Structures and Aliases
CompoundsHelper = Compounds()
Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChI"],
                                                 ["KEGG", "MetaCyc"])

Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/"
file_handle_dict = dict()
for source in "KEGG", "MetaCyc":
    for struct_type in "InChI", "SMILE":
        for struct_stage in "Charged", "Original":
            file_string = "_".join((source, struct_type, struct_stage))
            file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt"
            file_handle_dict[file_string] = open(file_name, "w")

resolved_structures = open('Resolved_Structures.txt', 'w')
unresolved_structures = open('Unresolved_Structures.txt', 'w')
for struct_type in sorted(Structures_Dict.keys()):

    #    if(struct_type != 'InChI'):
Esempio n. 3
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"])

diff_file = open("Compound_Charge_Differences.txt", 'w')
for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Structures_Dict):
        #diff_file.write("Zero structures for "+cpd+"\n")
        continue

    if ('InChI' not in Structures_Dict[cpd]):
        #diff_file.write("No InChI structure for "+cpd+"\n")
        continue

    current_charge = float(Compounds_Dict[cpd]['charge'])

    #Parse out InChI formula and layers
    inchi = list(Structures_Dict[cpd]['InChI'].keys())[0]
    (inchi_formula, inchi_layers) = InChIs.parse(inchi)

    inchi_charge = InChIs.charge(inchi_layers['q'], inchi_layers['p'])

    if (inchi_charge != current_charge):
Esempio n. 4
0
    print("Warning: compound " + disambiguating_cpd +
          " is obsolete, consider using the non-obsolete version")

Disambiguation_Object['from'] = {
    'id': disambiguating_cpd,
    'structures': {},
    'aliases': {},
    'names': {},
    'formula': compounds_dict[disambiguating_cpd]['formula'],
    'charge': compounds_dict[disambiguating_cpd]['charge'],
    'mass': compounds_dict[disambiguating_cpd]['mass']
}

Aliases_Dict = compounds_helper.loadMSAliases()
Names_Dict = compounds_helper.loadNames()
Structures_Dict = compounds_helper.loadStructures(["InChI", "SMILE"],
                                                  ["KEGG", "MetaCyc"])

#For reverse lookup
reverse_aliases_dict = dict()
for cpd in Aliases_Dict:
    for source in Aliases_Dict[cpd]:
        for alias in Aliases_Dict[cpd][source]:
            if (alias not in reverse_aliases_dict):
                reverse_aliases_dict[alias] = dict()
            if (source not in reverse_aliases_dict[alias]):
                reverse_aliases_dict[alias][source] = dict()
            reverse_aliases_dict[alias][source][cpd] = 1

reverse_structures_dict = dict()
for type in Structures_Dict:
    for alias in Structures_Dict[type]:
Esempio n. 5
0
#!/usr/bin/env python
import os, sys

temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Compounds

CompoundsHelper = Compounds()
Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChIKey"],
                                                 ["ModelSEED"])
Compounds_Dict = CompoundsHelper.loadCompounds()

for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Structures_Dict):
        Compounds_Dict[cpd]['inchikey'] = ""
        Compounds_Dict[cpd]['smiles'] = ""
    else:
        Compounds_Dict[cpd]['inchikey'] = Structures_Dict[cpd].get(
            'InChIKey', "")
        Compounds_Dict[cpd]['smiles'] = Structures_Dict[cpd].get('SMILE', "")

print "Saving compounds"
CompoundsHelper.saveCompounds(Compounds_Dict)
        cpd = array.pop(0)

        if (len(header) == 0):
            header = array
            continue

        if (cpd not in Overridden_Fields):
            Overridden_Fields[cpd] = dict()

        for i in range(len(array)):
            if (array[i] == 'null' or array[i] == 10000000):
                continue
            Overridden_Fields[cpd][header[i]] = array[i]

compounds_helper = Compounds()
structures_dict = compounds_helper.loadStructures(
    ["SMILE", "InChI", "InChIKey"], ["ModelSEED"])
ignoring_structures_dict = compounds_helper.loadStructures(
    ["SMILE", "InChIKey"], ["KEGG", "MetaCyc"])
aliases_dict = compounds_helper.loadSourceAliases()
compounds_dict = compounds_helper.loadCompounds()
Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/"

inchikey_dict = dict()
smiles_dict = dict()
for cpd in structures_dict:
    if ('InChIKey' in structures_dict[cpd]):
        for struct in structures_dict[cpd]['InChIKey'].keys():
            if (struct not in inchikey_dict):
                inchikey_dict[struct] = list()
            inchikey_dict[struct].append(cpd)
    if ('SMILE' in structures_dict[cpd]):