def adjust_protons(formula, protons): """ @param formula: chemical formula as string @param protons: number of hydrogens to add/remove as intager @return: new formula as string """ if not protons: return (formula, "") protons = int(protons) Notes = "" #The whole function assumes that there is a single formula string #If the formula can be broken into components, it must first be merged #This is because the proton layer only ever has a single component if (len(formula.split('.')) > 1): print( "Error: you must merge the formula components into a single formula string" ) print("You can do so using Compounds.mergeFormula()") return formula, "Unadjustable due to multiple components" atoms = Compounds.parseFormula(formula) if "H" in atoms: atoms['H'] += protons if atoms['H'] < 0: Notes = 'Too Many Protons adjusted!' if atoms['H'] == 0: del atoms['H'] elif (len(atoms) == 0): #special case for the proton atoms['H'] = protons formula = Compounds.buildFormula(atoms) return (formula, Notes)
def __init__(self, biochem_root='../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = biochem_root self.RxnsFile = biochem_root + rxns_file self.AliasFile = biochem_root + "Aliases/Reactions_Aliases.tsv" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds()
def parse(inchi, merge_formula=False): """ @param inchi: InChI string @param merge_formula: bool, use (not yet implemented) "merge_formulas" @return: formula string and dictionary of layers where key is layer code and value is layer contents """ layer_dict = dict([(x, "") for x in InChI_Layers]) # special case for proton m = re.match('^InChI=1S/p([-+]\d*)', inchi) if m: layer_dict['p'] = m.group(1) return "", layer_dict layers = inchi.split("/")[1:] formula = layers.pop(0) if merge_formula: formula = Compounds.mergeFormula(formula) for l in layers: layer_dict[l[0]] = l[1:] return formula, layer_dict
def __init__(self, biochem_root='../../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = os.path.dirname(__file__) + '/' + biochem_root self.RxnsFile = self.BiochemRoot + rxns_file self.AliasFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Aliases.txt" self.NameFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Names.txt" self.PwyFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Pathways.txt" self.ECFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_ECs.txt" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds()
def build(formula, layers, remove=(), merge_formula=False): """ I use 'remove' to strip p, q, and stereochemical layers depending on how I want to compare InChI strings @param formula: Formula string @param layers: layers dictionary @param remove: a dictionary of layer codes that have to be removed from InChI string @param merge_formula: bool, use (not yet implemented) "merge_formulas" @return: InChI string """ if merge_formula: formula = Compounds.mergeFormula(formula) inchi = "/".join( ["InChI=1S"] + [formula] + [layers[x] for x in InChI_Layers if layers[x] and x not in remove]) # if no valid layers return blank string return inchi if len(inchi) > 8 else ""
#!/usr/bin/env python import os, sys, re temp = list() header = 1 from BiochemPy import Compounds import pybel from rdkit.Chem import AllChem from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) #Load Structures and Aliases CompoundsHelper = Compounds() Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChI"], ["KEGG", "MetaCyc"]) Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/" file_handle_dict = dict() for source in "KEGG", "MetaCyc": for struct_type in "InChI", "SMILE": for struct_stage in "Charged", "Original": file_string = "_".join((source, struct_type, struct_stage)) file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt" file_handle_dict[file_string] = open(file_name, "w") resolved_structures = open('Resolved_Structures.txt', 'w') unresolved_structures = open('Unresolved_Structures.txt', 'w') for struct_type in sorted(Structures_Dict.keys()):
#!/usr/bin/env python import os, sys from csv import DictReader temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Aliases_Dict = CompoundsHelper.loadMSAliases() Names_Dict = CompoundsHelper.loadNames() Source_Classes = dict() reader = DictReader( open('../../../Biochemistry/Aliases/Source_Classifiers.txt'), dialect='excel-tab') for line in reader: if (line['Source Type'] not in Source_Classes): Source_Classes[line['Source Type']] = dict() Source_Classes[line['Source Type']][line['Source ID']] = 1 for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Aliases_Dict): continue Cpd_Aliases = dict() Alias_Count = 0 for source_type in 'Primary Database', 'Secondary Database', 'Published Model': for source in sorted(Aliases_Dict[cpd].keys()):
#!/usr/bin/env python import os import sys import json from BiochemPy import Compounds #Load Compounds CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/" Formulas_Dict = dict() for source in "KEGG", "MetaCyc": if (source not in Formulas_Dict): Formulas_Dict[source] = dict() for struct_type in "InChI", "SMILE": if (struct_type not in Formulas_Dict[source]): Formulas_Dict[source][struct_type] = dict() for struct_stage in "Charged", "Original": if (struct_stage not in Formulas_Dict[source][struct_type]): Formulas_Dict[source][struct_type][struct_stage] = dict() file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt" with open(file_name) as file_handle: for line in file_handle.readlines(): line = line.strip() array = line.split('\t') Formulas_Dict[source][struct_type][struct_stage][ array[0]] = {
#!/usr/bin/env python import os, sys, re temp = list() from BiochemPy import Reactions, Compounds compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() reactions_codes = reactions_helper.generateCodes(reactions_dict, stoich=False, transport=False) names_dict = compounds_helper.loadNames() searchnames_dict = dict() for msid in sorted(names_dict): for name in names_dict[msid]: searchname = compounds_helper.searchname(name) #Avoid redundancy where possible if (searchname not in searchnames_dict): searchnames_dict[searchname] = msid mhc = open('Mishit_Compound_Names.txt', 'w') with open('Parsed_Enzyme_Equations.txt') as fh: for line in fh.readlines(): line = line.strip() (id, old_equation) = line.split('\t') array = re.split(' (<?=>?|\+) ', old_equation) new_array = list() mishit = False for i in range(len(array)):
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Compound_To_Merge_From = "cpd00013" Compound_To_Merge_To = "cpd19013" Cpds_Rxns_Dict = dict() Rxns_Cpds_Dict = dict() for rxn in Reactions_Dict.keys(): if (Reactions_Dict[rxn]["status"] == "EMPTY"): continue for rgt in Reactions_Dict[rxn]["stoichiometry"].split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) if (cpd not in Cpds_Rxns_Dict): Cpds_Rxns_Dict[cpd] = dict() Cpds_Rxns_Dict[cpd][rxn] = 1
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Compounds CompoundsHelper = Compounds() Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChIKey"], ["ModelSEED"]) Compounds_Dict = CompoundsHelper.loadCompounds() for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Structures_Dict): Compounds_Dict[cpd]['inchikey'] = "" Compounds_Dict[cpd]['smiles'] = "" else: Compounds_Dict[cpd]['inchikey'] = Structures_Dict[cpd].get( 'InChIKey', "") Compounds_Dict[cpd]['smiles'] = Structures_Dict[cpd].get('SMILE', "") print "Saving compounds" CompoundsHelper.saveCompounds(Compounds_Dict)
#!/usr/bin/env python from BiochemPy import Reactions, Compounds import sys remove_index = 0 remove_string = 'ontology' compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() for header in range(len(compounds_helper.Headers)): if (compounds_helper.Headers[header] == remove_string): remove_index = header del compounds_helper.Headers[remove_index] for cpd in compounds_dict: del compounds_dict[cpd][remove_string] compounds_helper.saveCompounds(compounds_dict) reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() for header in range(len(reactions_helper.Headers)): if (reactions_helper.Headers[header] == remove_string): remove_index = header del reactions_helper.Headers[remove_index] for rxn in reactions_dict:
class Reactions: def __init__(self, biochem_root='../../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = os.path.dirname(__file__) + '/' + biochem_root self.RxnsFile = self.BiochemRoot + rxns_file self.AliasFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Aliases.txt" self.NameFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Names.txt" self.PwyFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Pathways.txt" self.ECFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_ECs.txt" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds() def loadReactions(self): reader = DictReader(open(self.RxnsFile), dialect='excel-tab') type_mapping = { "is_transport": int, "is_obsolete": int, "deltag": float, "deltagerr": float } lists = ["aliases", "pathways", "ec_numbers", "notes"] dicts = [] rxns_dict = dict() for line in reader: for list_type in lists: if (line[list_type] != "null"): line[list_type] = line[list_type].split("|") for dict_type in dicts: if (line[dict_type] != "null"): entries = line[dict_type].split('|') line[dict_type] = dict() for entry in entries: (type, list) = entry.split(':') line[dict_type][type] = list for heading, target_type in type_mapping.items(): try: line[heading] = target_type(line[heading]) except ValueError: # Generally caused by "null" strings line[heading] = None rxns_dict[line['id']] = line return rxns_dict def parseEquation(self, equation_string): rxn_cpds_array = list() reagent = -1 coeff = 1 index = 0 for text in equation_string.split(" "): if (text == "+"): continue match = re.search('^<?=>?$', text) if (match is not None): reagent = 1 match = re.search('^\((\d+(?:\.\d+)?)\)$', text) if (match is not None): coeff = match.group(1) # Correct for redundant ".0" in floats coeff = float(coeff) if (str(coeff)[-2:] == ".0"): coeff = int(round(coeff)) match = re.search('^(cpd\d{5})\[(\d)\]$', text) if (match is not None): #Side of equation coeff = coeff * reagent (cpd, cpt) = (match.group(1), match.group(2)) rgt_id = cpd + "_" + cpt + str(index) cpt = int(cpt) name = self.Compounds_Dict[cpd]["name"] formula = self.Compounds_Dict[cpd]["formula"] charge = self.Compounds_Dict[cpd]["charge"] rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": coeff, "compound": cpd, "compartment": cpt, "index": index, "name": name, "formula": formula, "charge": charge }) #Need to reset coeff for next compound coeff = 1 return rxn_cpds_array def parseStoich(self, stoichiometry): rxn_cpds_array = list() #For empty reaction if (stoichiometry == ""): return rxn_cpds_array for rgt in stoichiometry.split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) rgt_id = cpd + "_" + cpt + index coeff = float(coeff) # Correct for redundant ".0" in floats if (str(coeff)[-2:] == ".0"): coeff = int(round(coeff)) cpt = int(cpt) index = int(index) rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": coeff, "compound": cpd, "compartment": cpt, "index": index, "name": name, "formula": self.Compounds_Dict[cpd]["formula"], "charge": self.Compounds_Dict[cpd]["charge"] }) return rxn_cpds_array def parseStoichOnt(self, stoichiometry): rxn_cpds_dict = dict() #For empty reaction if (stoichiometry == ""): return rxn_cpds_array for rgt in stoichiometry.split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) cpd_cpt_tuple = (cpd, cpt) rxn_cpds_dict[cpd_cpt_tuple] = coeff return rxn_cpds_dict # The basis for this code, and producing combinations of ontologically related reactions # was found in Filipe's code (see commit: 92db86) def generateOntologyReactionCodes(self, rxn_id, rxn_cpds, cpds_neighbors): # returns list of reaction codes to match with biochemistry new_codes = dict() replacements = list() for cpd_cpt_tuple in rxn_cpds: replace_list = list() cpd_id = cpd_cpt_tuple[0] if cpd_id in cpds_neighbors: for neighbor_id in cpds_neighbors[cpd_id]: replace_list.append((cpd_id, neighbor_id)) if len(replace_list) > 0: replacements.append(replace_list) # Iterate through different numbers of compounds to replace # i.e. replace 1 compound, replace 2 compounds etc. # The output is a list of all the possible combination of replacements to explore replacement_product = list() for n_cpds in range(1, len(replacements) + 1): combination = list(itertools.combinations(replacements, n_cpds)) for entry in combination: product_list = list(itertools.product(*entry)) replacement_product += product_list if (len(replacements) == 0): return new_codes for entry in replacement_product: # Old code assumed that all "new" compounds were unique # cpd_swap_dict = {x:y for x, y in entry} # new_swapped_rxn_cpds = { (x if not x in cpd_swap_dict else cpd_swap_dict[x], c):y # for (x, c), y in rxn_cpds.items() } # Regenerate array of cpd dicts for use with generateCode() swapped_rxn_cpds_array = list() for (cpd, cpt), coeff in rxn_cpds.items(): new_cpd = cpd for old, new in entry: if (cpd == old): new_cpd = new reagent = { "reagent": new_cpd + '_' + cpt + '0', "compartment": cpt, "coefficient": float(coeff) } # Correct for redundant ".0" in floats if (str(reagent["coefficient"])[-2:] == ".0"): reagent["coefficient"] = int(round(reagent["coefficient"])) swapped_rxn_cpds_array.append(reagent) new_code = self.generateCode(swapped_rxn_cpds_array) new_codes[new_code] = entry return new_codes @staticmethod def isTransport(rxn_cpds_array): compartments_dict = dict() for rgt in rxn_cpds_array: compartments_dict[rgt['compartment']] = 1 if (len(compartments_dict.keys()) > 1): return 1 else: return 0 def generateCodes(self, rxns_dict, check_obsolete=True): codes_dict = dict() for rxn in rxns_dict: if (rxns_dict[rxn]['status'] == "EMPTY"): continue if (check_obsolete is False and rxns_dict[rxn]['is_obsolete'] == 1): continue rxn_cpds_array = self.parseStoich(rxns_dict[rxn]['stoichiometry']) code = self.generateCode(rxn_cpds_array) if (code not in codes_dict): codes_dict[code] = dict() codes_dict[code][rxn] = 1 return codes_dict def generateCode(self, rxn_cpds_array): #It matters if its a transport reaction, and we include protons when matching transport is_transport = self.isTransport(rxn_cpds_array) #It matters which side of the equation, so build reagents and products arrays reagents = list() products = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (x["reagent"], x["coefficient"])): #skip protons if ("cpd00067" in rgt["reagent"] and is_transport == 0): continue if (rgt["coefficient"] < 0): reagents.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) if (rgt["coefficient"] > 0): products.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) rgt_string = "|".join(reagents) pdt_string = "|".join(products) #Sorting the overall strings here helps with matching transporters rxn_string = "|=|".join(sorted([rgt_string, pdt_string])) return rxn_string @staticmethod def buildStoich(rxn_cpds_array): stoichiometry_array = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (int(x["coefficient"] > 0), x["reagent"])): # Correct for redundant ".0" in floats if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) rgt["coefficient"] = str(rgt["coefficient"]) rgt["compartment"] = str(rgt["compartment"]) rgt["index"] = str(rgt["index"]) rgt_string = ":".join([ rgt["coefficient"], rgt["compound"], rgt["compartment"], rgt["index"], rgt["name"] ]) stoichiometry_array.append(rgt_string) stoichiometry_string = ";".join(stoichiometry_array) return stoichiometry_string @staticmethod def removeCpdRedundancy(rgts_array): rgts_dict = dict() for rgt in rgts_array: if (rgt["reagent"] not in rgts_dict): rgts_dict[rgt["reagent"]] = 0 rgts_dict[rgt["reagent"]] += float(rgt["coefficient"]) new_rgts_array = list() for rgt in rgts_array: if (rgts_dict[rgt["reagent"]] == 0): continue rgt["coefficient"] = rgts_dict[rgt["reagent"]] # Correct for redundant ".0" in floats if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) new_rgts_array.append(rgt) #Trick to exclude reagent if it appears in array more than once rgts_dict[rgt["reagent"]] = 0 return new_rgts_array def balanceReaction(self, rgts_array): if (len(rgts_array) == 0): return "EMPTY" ######################################## # Check that each reagent is either a # different compound or in a different # compartment, and report. ######################################## rgts_dict = dict() for rgt in rgts_array: if (rgt["reagent"] not in rgts_dict): rgts_dict[rgt["reagent"]] = 0 rgts_dict[rgt["reagent"]] += 1 for rgt in rgts_dict.keys(): if (rgts_dict[rgt] > 1): return "Duplicate reagents" ######################################## # Check for duplicate compounds in # different compartments, these are # balanced directly. ####################################### cpds_coeff_dict = dict() for rgt in rgts_array: cpd = rgt["compound"] if (cpd not in cpds_coeff_dict): cpds_coeff_dict[cpd] = 0 # Use float() because you can get real coefficients cpds_coeff_dict[cpd] += float(rgt["coefficient"]) # Build dict of compounds cpds_dict = dict() for rgt in rgts_array: #Skip trans-compartmental compounds if (cpds_coeff_dict[rgt["compound"]] == 0): continue proxy_rgt = copy.deepcopy(rgt) proxy_rgt["coefficient"] = cpds_coeff_dict[rgt["compound"]] cpds_dict[rgt["compound"]] = proxy_rgt ######################################## # Check for duplicate elements, across # all compounds, these are balanced # directly. ####################################### rxn_net_charge = 0.0 rxn_net_mass = dict() cpdformerror = list() for cpd in cpds_dict.keys(): cpd_atoms = self.CompoundsHelper.parseFormula( cpds_dict[cpd]["formula"]) if (len(cpd_atoms.keys()) == 0): #Here we can skip photons and electrons #They are the valid compounds with no mass if (cpd == 'cpd11632' or cpd == 'cpd12713'): pass else: cpdformerror.append(cpd) cpd_coeff_charge = float(cpds_dict[cpd]["charge"]) * float( cpds_dict[cpd]["coefficient"]) rxn_net_charge += cpd_coeff_charge for atom in cpd_atoms.keys(): atom_coeff_mass = float(cpd_atoms[atom]) * float( cpds_dict[cpd]["coefficient"]) if (atom not in rxn_net_mass.keys()): rxn_net_mass[atom] = 0.0 rxn_net_mass[atom] += atom_coeff_mass if (len(cpdformerror) > 0): return "CPDFORMERROR" # Round out tiny numbers that occur because we add/substract floats # Threshold of 1e-6 found to capture all these instances without # removing actual small differences in mass. for atom in rxn_net_mass.keys(): if (rxn_net_mass[atom] > -1e-6 and rxn_net_mass[atom] < 1e-6): rxn_net_mass[atom] = 0 if (rxn_net_charge > -1e-6 and rxn_net_charge < 1e-6): rxn_net_charge = 0 # Report any imbalance imbalanced_atoms_array = list() for atom in sorted(rxn_net_mass.keys()): if (rxn_net_mass[atom] == 0): continue rxn_net_mass[atom] = "{0:.2f}".format(rxn_net_mass[atom]) # Correct for redundant ".00" in floats if (rxn_net_mass[atom][-3:] == ".00"): rxn_net_mass[atom] = str(int(float(rxn_net_mass[atom]))) imbalanced_atoms_array.append(atom + ":" + rxn_net_mass[atom]) rxn_net_charge = "{0:.2f}".format(rxn_net_charge) # Correct for redundant ".00" in floats if (rxn_net_charge[-3:] == ".00"): rxn_net_charge = str(int(float(rxn_net_charge))) status = "" if (len(imbalanced_atoms_array) > 0): status = "MI:" + "/".join(imbalanced_atoms_array) if (rxn_net_charge != "0"): if (len(status) == 0): status = "CI:" + rxn_net_charge else: status += "|CI:" + rxn_net_charge if (status == ""): status = "OK" return status def adjustCompound(self, rxn_cpds_array, compound, adjustment, compartment=0): if (adjustment == 0): return rxn_cpds_array ###################################################################### # We will always assume to adjust a compound automatically # in the compartment indexed as zero, unless otherwise specified. # This answers the question of how to handle transporters. ###################################################################### # Check to see if it already exists cpd_exists = 0 cpd_remove = {} for rgt in rxn_cpds_array: if (rgt["compound"] == compound and rgt["compartment"] == compartment): rgt["coefficient"] -= adjustment cpd_exists = 1 if (rgt["coefficient"] == 0): cpd_remove = rgt if (cpd_exists != 1): rgt_id = compound + "_" + str(compartment) + "0" rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": 0 - adjustment, "compound": compound, "compartment": compartment, "index": 0, "name": self.Compounds_Dict[compound]["name"], "formula": self.Compounds_Dict[compound]["formula"], "charge": self.Compounds_Dict[compound]["charge"] }) if (len(cpd_remove.keys()) > 0): rxn_cpds_array.remove(cpd_remove) #Got to adjust for floats for rgt in rxn_cpds_array: if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) return def replaceCompound(self, rxn_cpds_array, old_compound, new_compound): ###################################################################### # We will always assume that we will maintain the coefficient. # We will always assume that we will replace in all compartments. # The adjustment will fail silently, returning an empty array # if the old_compound cannot be found. ###################################################################### found_cpd = False for rgt in rxn_cpds_array: if (rgt["compound"] == old_compound): found_cpd = True rgt["compound"] = new_compound rgt["reagent"] = new_compound + "_" + str( rgt["compartment"]) + "0" rgt["name"] = self.Compounds_Dict[new_compound]['name'] return found_cpd def rebuildReaction(self, reaction_dict, stoichiometry=None): # Retrieve/Assign stoich if (stoichiometry is None): stoichiometry = reaction_dict['stoichiometry'] else: reaction_dict["stoichiometry"] = stoichiometry # Build list of "reagents" and "products" rxn_cpds_array = self.parseStoich(stoichiometry) reagents_array = list() products_array = list() compound_ids_dict = dict() for rgt in rxn_cpds_array: compound_ids_dict[rgt["compound"]] = 1 if (rgt["coefficient"] > 0): products_array.append(rgt) else: reagents_array.append(rgt) rgts_str__array = list() for rgt in reagents_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" rgts_str__array.append(id_string) equation_array = list() code_array = list() definition_array = list() equation_array.append(" + ".join(rgts_str__array)) definition_array.append(" + ".join(rgts_str__array)) code_array.append(" + ".join(x for x in rgts_str__array if "cpd00067" not in x)) code_array.append("<=>") if (reaction_dict["direction"] == "="): equation_array.append("<=>") definition_array.append("<=>") elif (reaction_dict["direction"] == "<"): equation_array.append("<=") definition_array.append("<=") else: equation_array.append("=>") definition_array.append("=>") pdts_str_array = list() for rgt in products_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" pdts_str_array.append(id_string) equation_array.append(" + ".join(pdts_str_array)) definition_array.append(" + ".join(pdts_str_array)) code_array.append(" + ".join(x for x in pdts_str_array if "cpd00067" not in x)) reaction_dict["code"] = " ".join(code_array) reaction_dict["equation"] = " ".join(equation_array) reaction_dict["definition"] = " ".join(definition_array) reaction_dict["compound_ids"] = ";".join( sorted(compound_ids_dict.keys())) # Replace ids with names in Definition for cpd_id in compound_ids_dict.keys(): if (cpd_id in reaction_dict["definition"]): reaction_dict["definition"] = reaction_dict[ "definition"].replace(cpd_id, self.Compounds_Dict[cpd_id]["name"]) # Define if transport? return def saveECs(self, ecs_dict): ecs_root = os.path.splitext(self.ECFile)[0] # Print to TXT ecs_file = open(ecs_root + ".txt", 'w') ecs_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) + "\n") for rxn in sorted(ecs_dict.keys()): for name in sorted(ecs_dict[rxn]): ecs_file.write("\t".join((rxn, name, 'Enzyme Class')) + "\n") ecs_file.close() def saveNames(self, names_dict): names_root = os.path.splitext(self.NameFile)[0] # Print to TXT names_file = open(names_root + ".txt", 'w') names_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) + "\n") for rxn in sorted(names_dict.keys()): for name in sorted(names_dict[rxn]): names_file.write("\t".join((rxn, name, 'name')) + "\n") names_file.close() def saveAliases(self, alias_dict): alias_root = os.path.splitext(self.AliasFile)[0] # Print to TXT alias_file = open(alias_root + ".txt", 'w') alias_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) + "\n") for rxn in sorted(alias_dict.keys()): for source in sorted(alias_dict[rxn].keys()): for alias in sorted(alias_dict[rxn][source]): alias_file.write("\t".join((rxn, alias, source)) + "\n") alias_file.close() def saveReactions(self, reactions_dict): rxns_root = os.path.splitext(self.RxnsFile)[0] # Print to TSV rxns_file = open(rxns_root + ".tsv", 'w') rxns_file.write("\t".join(self.Headers) + "\n") for rxn in sorted(reactions_dict.keys()): values_list = list() for header in self.Headers: value = reactions_dict[rxn][header] if (isinstance(value, list)): value = "|".join(value) if (isinstance(value, dict)): entries = list() for entry in value: entries.append(entry + ':' + value[entry]) value = "|".join(entries) values_list.append(str(value)) rxns_file.write("\t".join(values_list) + "\n") rxns_file.close() #Re-configure JSON new_reactions_dict = list() for rxn_id in sorted(reactions_dict): rxn_obj = reactions_dict[rxn_id] for key in rxn_obj: if (isinstance(rxn_obj[key], dict)): for entry in rxn_obj[key]: if (rxn_obj[key][entry] == "null"): rxn_obj[key][entry] = None if (rxn_obj[key] == "null"): rxn_obj[key] = None new_reactions_dict.append(rxn_obj) # Print to JSON rxns_file = open(rxns_root + ".json", 'w') rxns_file.write( json.dumps(new_reactions_dict, indent=4, sort_keys=True)) rxns_file.close() def loadMSAliases(self, sources_array=[]): if (len(sources_array) == 0): sources_array.append("All") aliases_dict = dict() reader = DictReader(open(self.AliasFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if ("All" not in sources_array and line['Source'] not in sources_array): continue if (line['ModelSEED ID'] not in aliases_dict): aliases_dict[line['ModelSEED ID']] = dict() for source in line['Source'].split('|'): if (source not in aliases_dict[line['ModelSEED ID']]): aliases_dict[line['ModelSEED ID']][source] = list() aliases_dict[line['ModelSEED ID']][source].append( line['External ID']) return aliases_dict def loadNames(self): names_dict = dict() reader = DictReader(open(self.NameFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if (line['ModelSEED ID'] not in names_dict): names_dict[line['ModelSEED ID']] = list() names_dict[line['ModelSEED ID']].append(line['External ID']) return names_dict def loadPathways(self): pathways_dict = dict() reader = DictReader(open(self.PwyFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if (line['ModelSEED ID'] not in pathways_dict): pathways_dict[line['ModelSEED ID']] = dict() if (line['Source'] not in pathways_dict[line['ModelSEED ID']]): pathways_dict[line['ModelSEED ID']][line['Source']] = list() pathways_dict[line['ModelSEED ID']][line['Source']].append( line['External ID']) return pathways_dict def loadECs(self): ecs_dict = dict() reader = DictReader(open(self.ECFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if (line['ModelSEED ID'] not in ecs_dict): ecs_dict[line['ModelSEED ID']] = list() ecs_dict[line['ModelSEED ID']].append(line['External ID']) return ecs_dict
output = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], universal_newlines=True) branch = output.strip() Disambiguation_Object['metadata']['branch'] = branch time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(time.time())) Disambiguation_Object['metadata']['date_time'] = time_str ########################################################## # # Collect compound data # ########################################################## from BiochemPy import Reactions, Compounds compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() if (disambiguating_cpd not in compounds_dict): print("Error: compound " + disambiguating_cpd + " is not found in the ModelSEED database") sys.exit() if (compounds_dict[disambiguating_cpd]['is_obsolete'] == 1): print("Warning: compound " + disambiguating_cpd + " is obsolete, consider using the non-obsolete version") Disambiguation_Object['from'] = { 'id': disambiguating_cpd, 'structures': {}, 'aliases': {},
#!/usr/bin/env python import os import sys import json from BiochemPy import Compounds, Reactions # Load Compounds compounds_helper = Compounds() aliases_dict = compounds_helper.loadMSAliases() # Load Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() # Load ACPs Overridden_Fields = dict() header = list() with open(os.path.dirname(__file__) + '/ACPs_Master_Formula_Charge.txt') as fh: for line in fh.readlines(): line = line.strip() array = line.split('\t') cpd = array.pop(0) if (len(header) == 0): header = array continue if (cpd not in Overridden_Fields): Overridden_Fields[cpd] = dict()
#!/usr/bin/env python import os, sys, re, copy from csv import DictReader from collections import OrderedDict temp = list() header = True Biochem = "MetaCyc" sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() names_dict = compounds_helper.loadNames() searchnames_dict = dict() all_names_dict = dict() new_name_count = dict() for msid in sorted(names_dict): for name in names_dict[msid]: all_names_dict[name] = 1 searchname = compounds_helper.searchname(name) #Avoid redundancy where possible if (searchname not in searchnames_dict): searchnames_dict[searchname] = msid original_alias_dict = compounds_helper.loadMSAliases() source_alias_dict = dict() all_aliases = dict()
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"]) Update_Compounds = 0 for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Structures_Dict or 'InChI' not in Structures_Dict[cpd]): continue current_formula = Compounds_Dict[cpd]['formula'] if (current_formula != "null"): continue (inchi_formula, inchi_layers) = InChIs.parse(Structures_Dict[cpd]['InChI']) (inchi_formula, notes) = Compounds.mergeFormula(inchi_formula) (adjusted_inchi_formula, notes) = InChIs.adjust_protons(inchi_formula, inchi_layers['p']) if (adjusted_inchi_formula != current_formula): Compounds_Dict[cpd]['formula'] = adjusted_inchi_formula Update_Compounds += 1
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"]) diff_file = open("Compound_Formula_Differences.txt", 'w') for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Structures_Dict): diff_file.write("Zero structures for " + cpd + "\n") continue if ('InChI' not in Structures_Dict[cpd]): diff_file.write("No InChI structure for " + cpd + "\n") continue current_formula = Compounds_Dict[cpd]['formula'] #Parse out InChI formula (inchi_formula, inchi_layers) = InChIs.parse(Structures_Dict[cpd]['InChI']) #Make sure formula is merged appropriately before applying proton adjustment (inchi_formula, notes) = Compounds.mergeFormula(inchi_formula) if (notes != ""): diff_file.write("Notes from merging InChI formula for " + cpd + ": " +
#!/usr/bin/env python import os, sys, re, copy from csv import DictReader from collections import OrderedDict temp = list() header = True Biochem = "MetaCyc" Biochem_Root = "../../Biochemistry/Aliases/Provenance/Primary_Databases/" sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() reactions_codes = reactions_helper.generateCodes(reactions_dict) Default_Rxn = { "id": "cpd00001", "name": "null", "abbreviation": "null", "aliases": "null", "code": "null", "stoichiometry": "null", "equation": "null", "definition": "null", "reversibility": "=", "direction": "=", "deltag": "10000000",
Structures_Root=os.path.dirname(__file__)+"/../../Biochemistry/Structures/" # Load pKas and pKbs cpd_pKab_dict=dict() for DB in ["KEGG","MetaCyc"]: with open(Structures_Root+DB+'/pKa_Strings.txt') as fh: for line in fh.readlines(): line=line.strip() array=line.split('\t') if(array[0] not in cpd_pKab_dict): cpd_pKab_dict[array[0]]={array[1]:array[2]} else: cpd_pKab_dict[array[0]][array[1]]=array[2] compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() structures_dict = compounds_helper.loadStructures(["SMILE","InChI","InChIKey"],["ModelSEED"]) aliases_dict = compounds_helper.loadMSAliases() # We're removing all pKa and pKb before loading new ones for cpd in compounds_dict: compounds_dict[cpd]['pka']="" compounds_dict[cpd]['pkb']="" # We're only loading pKa/pKb for compounds that have an accepted unique structure in ModelSEED for cpd in structures_dict: found=False for DB in ["KEGG","MetaCyc"]: if(found is True or DB not in aliases_dict[cpd]): continue
#!/usr/bin/env python import os import sys import subprocess import time import copy import re import json from collections import OrderedDict from BiochemPy import Reactions, Compounds compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() cpds_aliases_dict = compounds_helper.loadMSAliases() cpds_names_dict = compounds_helper.loadNames() structures_dict = compounds_helper.loadStructures(["InChI","SMILE"],["ModelSEED"]) for cpd in cpds_names_dict: if(cpd not in compounds_dict): print(cpd+" shouldn't be in names_dict") for cpd in cpds_aliases_dict: if(cpd not in compounds_dict): print(cpd+" shouldn't be in aliases_dict") for cpd in structures_dict: if(cpd not in compounds_dict): print(cpd+" shouldn't be in structures_dict") # Load Reactions reactions_helper = Reactions()
#!/usr/bin/env python import os, sys temp=list(); header=1; sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() cpds_aliases_dict = compounds_helper.loadMSAliases() cpds_names_dict = compounds_helper.loadNames() # We actually don't want obsolete reactions and compounds in our database # So we're striving to remove any 'new' ones that are obsolete # Any information attached to them should be associated with their linked counterpart # We need to retain older compounds that are now obsolete as these may be present in prior published models # The number used here is the last compound entered before we re-integrated updates from KEGG and MetaCyc # In the fall of 2018, so after this point, we'll take out obsolete compounds last_cpd_str='cpd31000' last_cpd_int=int(last_cpd_str[3:]) delete_cpds=list() for cpd in compounds_dict: cpd_int = int(cpd[3:]) if(cpd_int > last_cpd_int and compounds_dict[cpd]['is_obsolete']): delete_cpds.append(cpd) for cpd in delete_cpds:
#!/usr/bin/env python import os, sys from BiochemPy import Compounds, Reactions, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() print("\n================") print( "For Section: \"Computation of thermodynamic properties of ModelSEED compounds and reaction\"\n" ) MS_Complete_Structures = dict() with open("../../../Biochemistry/Structures/Unique_ModelSEED_Structures.txt" ) as fh: for line in fh.readlines(): line = line.strip() array = line.split('\t') if ("InChI" in array[5]): MS_Complete_Structures[array[5]] = 1 MNX_Complete_Structures = dict() with open("../../../Biochemistry/Structures/MetaNetX/chem_prop.tsv") as fh: header = 1 for line in fh.readlines(): if (line[0] == "#"): continue
class Reactions: def __init__(self, biochem_root='../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = biochem_root self.RxnsFile = biochem_root + rxns_file self.AliasFile = biochem_root + "Aliases/Reactions_Aliases.tsv" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds() def loadReactions(self): reader = DictReader(open(self.RxnsFile), dialect='excel-tab') rxns_dict = dict() for line in reader: for header in ["is_transport", "is_obsolete"]: line[header] = int(line[header]) rxns_dict[line['id']] = line return rxns_dict def parseStoich(self, stoichiometry): rxn_cpds_array = list() for rgt in stoichiometry.split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) rgt_id = cpd + "_" + cpt + index coeff = float(coeff) # Correct for redundant ".0" in floats if (str(coeff)[-2:] == ".0"): coeff = int(round(coeff)) cpt = int(cpt) index = int(index) rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": coeff, "compound": cpd, "compartment": cpt, "index": index, "name": name, "formula": self.Compounds_Dict[cpd]["formula"], "charge": self.Compounds_Dict[cpd]["charge"] }) return rxn_cpds_array @staticmethod def isTransport(rxn_cpds_array): compartments_dict = dict() for rgt in rxn_cpds_array: compartments_dict[rgt['compartment']] = 1 if (len(compartments_dict.keys()) > 1): return 1 else: return 0 def generateCodes(self, rxns_dict): codes_dict = dict() for rxn in rxns_dict: if (rxns_dict[rxn]['status'] == "EMPTY"): continue code = self.generateCode(rxns_dict[rxn]['stoichiometry']) if (code not in codes_dict): codes_dict[code] = dict() codes_dict[code][rxn] = 1 return codes_dict def generateCode(self, stoichiometry): rxn_cpds_array = self.parseStoich(stoichiometry) #It matters if its a transport reaction, and we include protons when matching transpor is_transport = self.isTransport(rxn_cpds_array) #It matters which side of the equation, so build reagents and products arrays reagents = list() products = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (x["reagent"], x["coefficient"])): #skip protons if ("cpd00067" in rgt["reagent"] and is_transport == 0): continue if (rgt["coefficient"] < 0): reagents.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) if (rgt["coefficient"] > 0): products.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) rgt_string = "|".join(reagents) pdt_string = "|".join(products) #Sorting the overall strings here helps with matching transporters rxn_string = "|=|".join(sorted([rgt_string, pdt_string])) return rxn_string @staticmethod def buildStoich(rxn_cpds_array): stoichiometry_array = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (int(x["coefficient"] > 0), x["reagent"])): # Correct for redundant ".0" in floats if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) rgt["coefficient"] = str(rgt["coefficient"]) rgt["compartment"] = str(rgt["compartment"]) rgt["index"] = str(rgt["index"]) rgt_string = ":".join([ rgt["coefficient"], rgt["compound"], rgt["compartment"], rgt["index"], rgt["name"] ]) stoichiometry_array.append(rgt_string) stoichiometry_string = ";".join(stoichiometry_array) return stoichiometry_string def balanceReaction(self, rgts_array): if (len(rgts_array) == 0): return "EMPTY" ######################################## # Check that each reagent is either a # different compound or in a different # compartment, and report. ######################################## rgts_dict = dict() for rgt in rgts_array: if (rgt["reagent"] not in rgts_dict): rgts_dict[rgt["reagent"]] = 0 rgts_dict[rgt["reagent"]] += 1 for rgt in rgts_dict.keys(): if (rgts_dict[rgt] > 1): return "ERROR: Duplicate reagents" ######################################## # Check for duplicate compounds in # different compartments, these are # balanced directly. ####################################### cpds_coeff_dict = dict() for rgt in rgts_array: cpd = rgt["compound"] if (cpd not in cpds_coeff_dict): cpds_coeff_dict[cpd] = 0 # Use float() because you can get real coefficients cpds_coeff_dict[cpd] += float(rgt["coefficient"]) # Build dict of compounds cpds_dict = dict() for rgt in rgts_array: rgt["coefficient"] = cpds_coeff_dict[rgt["compound"]] cpds_dict[rgt["compound"]] = rgt ######################################## # Check for duplicate elements, across # all compounds, these are balanced # directly. ####################################### rxn_net_charge = 0.0 rxn_net_mass = dict() for cpd in cpds_dict.keys(): cpd_atoms = self.CompoundsHelper.parseFormula( cpds_dict[cpd]["formula"]) if (len(cpd_atoms.keys()) == 0): return "CPDFORMERROR" cpd_coeff_charge = float(cpds_dict[cpd]["charge"]) * float( cpds_dict[cpd]["coefficient"]) rxn_net_charge += cpd_coeff_charge for atom in cpd_atoms.keys(): atom_coeff_mass = float(cpd_atoms[atom]) * float( cpds_dict[cpd]["coefficient"]) if (atom not in rxn_net_mass.keys()): rxn_net_mass[atom] = 0.0 rxn_net_mass[atom] += atom_coeff_mass # Round out tiny numbers that occur because we add/substract floats # Threshold of 1e-6 found to capture all these instances without # removing actual small differences in mass. for atom in rxn_net_mass.keys(): if (rxn_net_mass[atom] > -1e-6 and rxn_net_mass[atom] < 1e-6): rxn_net_mass[atom] = 0 if (rxn_net_charge > -1e-6 and rxn_net_charge < 1e-6): rxn_net_charge = 0 # Report any imbalance imbalanced_atoms_array = list() for atom in sorted(rxn_net_mass.keys()): if (rxn_net_mass[atom] == 0): continue # Correct for redundant ".0" in floats if (str(rxn_net_mass[atom])[-2:] == ".0"): rxn_net_mass[atom] = int(round(rxn_net_mass[atom])) imbalanced_atoms_array.append(atom + ":" + str(rxn_net_mass[atom])) # Correct for redundant ".0" in floats if (str(rxn_net_charge)[-2:] == ".0"): rxn_net_charge = int(rxn_net_charge) status = "" if (len(imbalanced_atoms_array) > 0): status = "MI:" + "/".join(imbalanced_atoms_array) if (rxn_net_charge != 0): if (len(status) == 0): status = "CI:" + str(rxn_net_charge) else: status += "|CI:" + str(rxn_net_charge) if (status == ""): status = "OK" return status def adjustCompound(self, rxn_cpds_array, compound, adjustment, compartment=0): if (adjustment == 0): return rxn_cpds_array ###################################################################### # We will always assume to adjust a compound automatically # in the compartment indexed as zero, unless otherwise specified. # This answers the question of how to handle transporters. ###################################################################### # Check to see if it already exists cpd_exists = 0 cpd_remove = {} for rgt in rxn_cpds_array: if (rgt["compound"] == compound and rgt["compartment"] == compartment): rgt["coefficient"] -= adjustment cpd_exists = 1 if (rgt["coefficient"] == 0): cpd_remove = rgt if (cpd_exists != 1): rgt_id = compound + "_" + str(compartment) + "0" rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": 0 - adjustment, "compound": compound, "compartment": compartment, "index": 0, "name": self.Compounds_Dict[compound]["name"], "formula": self.Compounds_Dict[compound]["formula"], "charge": self.Compounds_Dict[compound]["charge"] }) if (len(cpd_remove.keys()) > 0): rxn_cpds_array.remove(cpd_remove) return def rebuildReaction(self, reaction_dict, stoichiometry): # Assign stoich reaction_dict["stoichiometry"] = stoichiometry # Build list of "reagents" and "products" rxn_cpds_array = self.parseStoich(stoichiometry) reagents_array = list() products_array = list() compound_ids_dict = dict() for rgt in rxn_cpds_array: compound_ids_dict[rgt["compound"]] = 1 if (rgt["coefficient"] > 0): products_array.append(rgt) else: reagents_array.append(rgt) rgts_str__array = list() for rgt in reagents_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" rgts_str__array.append(id_string) equation_array = list() code_array = list() definition_array = list() equation_array.append(" + ".join(rgts_str__array)) definition_array.append(" + ".join(rgts_str__array)) code_array.append(" + ".join(x for x in rgts_str__array if "cpd00067" not in x)) code_array.append("<=>") if (reaction_dict["direction"] == "="): equation_array.append("<=>") definition_array.append("<=>") elif (reaction_dict["direction"] == "<"): equation_array.append("<=") definition_array.append("<=") else: equation_array.append("=>") definition_array.append("=>") pdts_str_array = list() for rgt in products_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" pdts_str_array.append(id_string) equation_array.append(" + ".join(pdts_str_array)) definition_array.append(" + ".join(pdts_str_array)) code_array.append(" + ".join(x for x in pdts_str_array if "cpd00067" not in x)) reaction_dict["code"] = " ".join(code_array) reaction_dict["equation"] = " ".join(equation_array) reaction_dict["definition"] = " ".join(definition_array) reaction_dict["compound_ids"] = ";".join( sorted(compound_ids_dict.keys())) # Replace ids with names in Definition for cpd_id in compound_ids_dict.keys(): if (cpd_id in reaction_dict["definition"]): reaction_dict["definition"] = reaction_dict[ "definition"].replace(cpd_id, self.Compounds_Dict[cpd_id]["name"]) return def saveReactions(self, reactions_dict): rxns_root = os.path.splitext(self.RxnsFile)[0] # Print to TSV rxns_file = open(rxns_root + ".tsv", 'w') rxns_file.write("\t".join(self.Headers) + "\n") for rxn in sorted(reactions_dict.keys()): rxns_file.write("\t".join( str(reactions_dict[rxn][header]) for header in self.Headers) + "\n") rxns_file.close() # Print to JSON rxns_file = open(rxns_root + ".json", 'w') rxns_file.write(json.dumps(reactions_dict, indent=4, sort_keys=True)) rxns_file.close() def loadMSAliases(self, sources_array=[]): if (len(sources_array) == 0): return {} aliases_dict = dict() reader = DictReader(open(self.AliasFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['MS ID']): continue if (line['Source'] not in sources_array): continue if (line['MS ID'] not in aliases_dict): aliases_dict[line['MS ID']] = dict() if (line['Source'] not in aliases_dict[line['MS ID']]): aliases_dict[line['MS ID']][line['Source']] = list() aliases_dict[line['MS ID']][line['Source']].append( line['External ID']) return aliases_dict
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Update_Compounds = 0 for cpd in sorted(Compounds_Dict.keys()): old_formula = Compounds_Dict[cpd]["formula"] (new_formula, notes) = CompoundsHelper.mergeFormula(old_formula) if (notes != ""): Compounds_Dict[cpd]["notes"] = notes Update_Compounds = 1 if (new_formula != old_formula): print("Updating " + cpd + ": " + old_formula + " --> " + new_formula) Compounds_Dict[cpd]["formula"] = new_formula Update_Compounds = 1 if (Update_Compounds == 1): print("Saving compounds") CompoundsHelper.saveCompounds(Compounds_Dict)
#!/usr/bin/env python import os import sys import json from BiochemPy import Compounds, Reactions #Load Compounds CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() MS_Aliases_Dict = CompoundsHelper.loadMSAliases(["MetaCyc", "PlantCyc"]) for cpd in MS_Aliases_Dict: if ('PlantCyc' not in MS_Aliases_Dict[cpd]): MS_Aliases_Dict[cpd]['PlantCyc'] = [] if ('MetaCyc' not in MS_Aliases_Dict[cpd]): MS_Aliases_Dict[cpd]['MetaCyc'] = [] print("\t".join([ cpd, "|".join(MS_Aliases_Dict[cpd]['PlantCyc']), "|".join(MS_Aliases_Dict[cpd]["MetaCyc"]) ])) #Load Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() MS_Aliases_Dict = ReactionsHelper.loadMSAliases(["MetaCyc", "PlantCyc"]) for rxn in MS_Aliases_Dict: if ('PlantCyc' not in MS_Aliases_Dict[rxn]): MS_Aliases_Dict[rxn]['PlantCyc'] = [] if ('MetaCyc' not in MS_Aliases_Dict[rxn]):
#!/usr/bin/env python import os, sys from BiochemPy import Compounds, Reactions, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() parents_children = dict() children_parents = dict() inchi_exception = "WQZGKKKJIJFFOK" cpd_exception = "cpd00027" for cpd in compounds_dict: cpd_obj = compounds_dict[cpd] if (not isinstance(cpd_obj['ontology'], dict)): continue if (inchi_exception not in cpd_obj['inchikey']): continue print(cpd_obj['id'], cpd_obj['inchikey']) if ('parent_class' in cpd_obj['ontology']): for cpd in cpd_obj['ontology']['parent_class'].split(";"): if (cpd_obj['id'] not in children_parents): children_parents[cpd_obj['id']] = dict() children_parents[cpd_obj['id']][cpd] = 1 if (cpd not in parents_children): parents_children[cpd] = dict() parents_children[cpd][cpd_obj['id']] = 1 # print( cpd_obj['id'], cpd_obj['ontology'], isinstance(cpd_obj['ontology'],dict) ) print(parents_children)
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Structures_Dict = CompoundsHelper.loadStructures(["InChI"], ["ModelSEED"]) diff_file = open("Compound_Charge_Differences.txt", 'w') for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Structures_Dict): #diff_file.write("Zero structures for "+cpd+"\n") continue if ('InChI' not in Structures_Dict[cpd]): #diff_file.write("No InChI structure for "+cpd+"\n") continue current_charge = float(Compounds_Dict[cpd]['charge']) #Parse out InChI formula and layers inchi = list(Structures_Dict[cpd]['InChI'].keys())[0] (inchi_formula, inchi_layers) = InChIs.parse(inchi) inchi_charge = InChIs.charge(inchi_layers['q'], inchi_layers['p']) if (inchi_charge != current_charge):
if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx inchikey = "-".join(inchikey.split('-')[0:2]) if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx inchikey = inchikey.split('-')[0] if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx file_handle.close() #Here we can cross-check the structures that are in ModelSEED to find ones where #there is a match in eQuilibrator compounds_helper = Compounds() structures_dict = compounds_helper.loadStructures(["InChIKey"], ["ModelSEED"]) seed_mnx_structural_map = dict() for cpd in structures_dict: structure_type = 'InChIKey' if (structure_type not in structures_dict[cpd]): #The load structures function will return all compounds, so have to #Check that the structure is there continue #As these are unique structures, i.e. 1-1 mapping with compound id, #there's only ever one in each list for each compound structure = list(structures_dict[cpd][structure_type].keys())[0] #Here we check on three levels, we check the full string #Then the deprotonated string, then the structure alone
#!/usr/bin/env python import os, sys temp=list(); header=1; sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Structures_Dict = CompoundsHelper.loadStructures(["InChI"],["ModelSEED"]) for cpd in sorted(Compounds_Dict.keys()): if(Compounds_Dict[cpd]['inchikey'] == '' or cpd not in Structures_Dict): continue (inchi_formula,inchi_layers) = InChIs.parse(Structures_Dict[cpd]['InChI']) merged_inchi_formula = CompoundsHelper.mergeFormula(inchi_formula)[0] adjusted_inchi_formula = (InChIs.adjust_protons(merged_inchi_formula,inchi_layers['p']))[0] if(adjusted_inchi_formula != Compounds_Dict[cpd]['formula']): adjusted_inchi_atoms_dict = CompoundsHelper.parseFormula(adjusted_inchi_formula) if('H' in adjusted_inchi_atoms_dict): del(adjusted_inchi_atoms_dict['H']) adjusted_inchi_protonfree_formula = CompoundsHelper.buildFormula(adjusted_inchi_atoms_dict) original_formula_atoms_dict = CompoundsHelper.parseFormula(Compounds_Dict[cpd]['formula']) if('H' in original_formula_atoms_dict): del(original_formula_atoms_dict['H']) original_formula_protonfree_formula = CompoundsHelper.buildFormula(original_formula_atoms_dict)