mnx_inchikey_dict[inchikey] = mnx inchikey = "-".join(inchikey.split('-')[0:2]) if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx inchikey = inchikey.split('-')[0] if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx file_handle.close() #Here we can cross-check the structures that are in ModelSEED to find ones where #there is a match in eQuilibrator compounds_helper = Compounds() structures_dict = compounds_helper.loadStructures(["InChIKey"], ["ModelSEED"]) seed_mnx_structural_map = dict() for cpd in structures_dict: structure_type = 'InChIKey' if (structure_type not in structures_dict[cpd]): #The load structures function will return all compounds, so have to #Check that the structure is there continue #As these are unique structures, i.e. 1-1 mapping with compound id, #there's only ever one in each list for each compound structure = list(structures_dict[cpd][structure_type].keys())[0] #Here we check on three levels, we check the full string #Then the deprotonated string, then the structure alone #As per email from Elad and Moritz, we should not expect
#!/usr/bin/env python import os, sys, re temp = list() header = 1 from BiochemPy import Compounds import pybel from rdkit.Chem import AllChem from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) #Load Structures and Aliases CompoundsHelper = Compounds() Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChI"], ["KEGG", "MetaCyc"]) Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/" file_handle_dict = dict() for source in "KEGG", "MetaCyc": for struct_type in "InChI", "SMILE": for struct_stage in "Charged", "Original": file_string = "_".join((source, struct_type, struct_stage)) file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt" file_handle_dict[file_string] = open(file_name, "w") resolved_structures = open('Resolved_Structures.txt', 'w') unresolved_structures = open('Unresolved_Structures.txt', 'w') for struct_type in sorted(Structures_Dict.keys()): # if(struct_type != 'InChI'):
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"]) diff_file = open("Compound_Charge_Differences.txt", 'w') for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Structures_Dict): #diff_file.write("Zero structures for "+cpd+"\n") continue if ('InChI' not in Structures_Dict[cpd]): #diff_file.write("No InChI structure for "+cpd+"\n") continue current_charge = float(Compounds_Dict[cpd]['charge']) #Parse out InChI formula and layers inchi = list(Structures_Dict[cpd]['InChI'].keys())[0] (inchi_formula, inchi_layers) = InChIs.parse(inchi) inchi_charge = InChIs.charge(inchi_layers['q'], inchi_layers['p']) if (inchi_charge != current_charge):
print("Warning: compound " + disambiguating_cpd + " is obsolete, consider using the non-obsolete version") Disambiguation_Object['from'] = { 'id': disambiguating_cpd, 'structures': {}, 'aliases': {}, 'names': {}, 'formula': compounds_dict[disambiguating_cpd]['formula'], 'charge': compounds_dict[disambiguating_cpd]['charge'], 'mass': compounds_dict[disambiguating_cpd]['mass'] } Aliases_Dict = compounds_helper.loadMSAliases() Names_Dict = compounds_helper.loadNames() Structures_Dict = compounds_helper.loadStructures(["InChI", "SMILE"], ["KEGG", "MetaCyc"]) #For reverse lookup reverse_aliases_dict = dict() for cpd in Aliases_Dict: for source in Aliases_Dict[cpd]: for alias in Aliases_Dict[cpd][source]: if (alias not in reverse_aliases_dict): reverse_aliases_dict[alias] = dict() if (source not in reverse_aliases_dict[alias]): reverse_aliases_dict[alias][source] = dict() reverse_aliases_dict[alias][source][cpd] = 1 reverse_structures_dict = dict() for type in Structures_Dict: for alias in Structures_Dict[type]:
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Compounds CompoundsHelper = Compounds() Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChIKey"], ["ModelSEED"]) Compounds_Dict = CompoundsHelper.loadCompounds() for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Structures_Dict): Compounds_Dict[cpd]['inchikey'] = "" Compounds_Dict[cpd]['smiles'] = "" else: Compounds_Dict[cpd]['inchikey'] = Structures_Dict[cpd].get( 'InChIKey', "") Compounds_Dict[cpd]['smiles'] = Structures_Dict[cpd].get('SMILE', "") print "Saving compounds" CompoundsHelper.saveCompounds(Compounds_Dict)
cpd = array.pop(0) if (len(header) == 0): header = array continue if (cpd not in Overridden_Fields): Overridden_Fields[cpd] = dict() for i in range(len(array)): if (array[i] == 'null' or array[i] == 10000000): continue Overridden_Fields[cpd][header[i]] = array[i] compounds_helper = Compounds() structures_dict = compounds_helper.loadStructures( ["SMILE", "InChI", "InChIKey"], ["ModelSEED"]) ignoring_structures_dict = compounds_helper.loadStructures( ["SMILE", "InChIKey"], ["KEGG", "MetaCyc"]) aliases_dict = compounds_helper.loadSourceAliases() compounds_dict = compounds_helper.loadCompounds() Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/" inchikey_dict = dict() smiles_dict = dict() for cpd in structures_dict: if ('InChIKey' in structures_dict[cpd]): for struct in structures_dict[cpd]['InChIKey'].keys(): if (struct not in inchikey_dict): inchikey_dict[struct] = list() inchikey_dict[struct].append(cpd) if ('SMILE' in structures_dict[cpd]):