#!/usr/bin/env python import os, sys, re from csv import DictReader temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Aliases_Dict = ReactionsHelper.loadMSAliases() Names_Dict = ReactionsHelper.loadNames() ECs_Dict = ReactionsHelper.loadECs() Source_Classes = dict() reader = DictReader(open('../../Biochemistry/Aliases/Source_Classifiers.txt'), dialect='excel-tab') for line in reader: if (line['Source Type'] not in Source_Classes): Source_Classes[line['Source Type']] = dict() Source_Classes[line['Source Type']][line['Source ID']] = 1 for rxn in sorted(Reactions_Dict.keys()): if (rxn not in Aliases_Dict): continue Rxn_Source_Aliases = dict() Rxn_Aliases = dict() Alias_Count = 0 for source_type in 'Primary Database', 'Secondary Database', 'Published Model':
#!/usr/bin/env python from BiochemPy import Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Update_Reactions = 0 status_file = open("Status_Changes_After_Proton_Adjustment.txt", 'w') for rxn in sorted(Reactions_Dict.keys()): if (Reactions_Dict[rxn]["status"] == "EMPTY"): continue #Find statuses that only have proton imbalance if ("MI" not in Reactions_Dict[rxn]["status"]): continue Status_Blocks = Reactions_Dict[rxn]["status"].split("|") for block in Status_Blocks: if ("MI" not in block): continue block = block.replace("MI:", "") elements = block.split("/") #only making adjustments if mass imbalance is a single element, #and the element is hydrogen if (len(elements) > 1 or not elements[0].startswith("H:")): continue (element, number) = elements[0].split(":")
if(neighbor not in neighbor_list): neighbor_list.add(neighbor) if(neighbor in cpd_dict): traverse_ontological_network(neighbor, cpd_dict, neighbor_list) # We iterate through the compounds, and create lists of all their generic parents # Doesn't have to be their immediate parent. # We do this as we want to explore all possible links between generic and specific reactions child_parent_sets = dict() for child in compounds_dict: parents_list=set() traverse_ontological_network(child, compounds_dict, parents_list) child_parent_sets[child]=parents_list # load reactions and their codes for matching reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() reactions_codes = reactions_helper.generateCodes(reactions_dict) (file_stub,suffix) = ONTOLOGY_FILE.rsplit('.', 1) result_file='.'.join([file_stub,'out']) outfile = open(result_file,'w') outfile.write('\t'.join(["Original Reaction","Matched Reaction","Original Description","Matched Description","Swapped Compounds",'\n'])) for rxn in sorted(reactions_dict.keys()): if(reactions_dict[rxn]["status"] == "EMPTY"): continue if(reactions_dict[rxn]["is_obsolete"] == 1): continue
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Update_Reactions = 0 for rxn in sorted(Reactions_Dict.keys()): if (Reactions_Dict[rxn]["status"] == "EMPTY"): continue Rxn_Cpds_Array = ReactionsHelper.parseStoich( Reactions_Dict[rxn]["stoichiometry"]) Stoichiometry = ReactionsHelper.buildStoich(Rxn_Cpds_Array) if (Stoichiometry != Reactions_Dict[rxn]["stoichiometry"]): ReactionsHelper.rebuildReaction(Reactions_Dict[rxn], Stoichiometry) Update_Reactions += 1 print "Saving rebuilt equations for " + str(Update_Reactions) + " reactions" ReactionsHelper.saveReactions(Reactions_Dict)
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() rxns_aliases_dict = reactions_helper.loadMSAliases() rxns_names_dict = reactions_helper.loadNames() rxns_ecs_dict = reactions_helper.loadECs() # We actually don't want obsolete reactions and compounds in our database # So we're striving to remove any 'new' ones that are obsolete # Any information attached to them should be associated with their linked counterpart # We need to retain older reactions that are now obsolete as these may be present in prior published models # The number used here is the last reaction entered before we re-integrated updates from KEGG and MetaCyc # In the fall of 2018, so after this point, we'll take out obsolete reactions last_rxn_str = 'rxn39250' last_rxn_int = int(last_rxn_str[3:]) delete_rxns = list() for rxn in reactions_dict: rxn_int = int(rxn[3:]) if (rxn_int > last_rxn_int and reactions_dict[rxn]['is_obsolete']): delete_rxns.append(rxn) for rxn in delete_rxns:
#!/usr/bin/env python import os, sys from BiochemPy import Compounds, Reactions, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() print("\n================") print( "For Section: \"Computation of thermodynamic properties of ModelSEED compounds and reaction\"\n" ) MS_Complete_Structures = dict() with open("../../../Biochemistry/Structures/Unique_ModelSEED_Structures.txt" ) as fh: for line in fh.readlines(): line = line.strip() array = line.split('\t') if ("InChI" in array[5]): MS_Complete_Structures[array[5]] = 1 MNX_Complete_Structures = dict() with open("../../../Biochemistry/Structures/MetaNetX/chem_prop.tsv") as fh: header = 1 for line in fh.readlines(): if (line[0] == "#"): continue
#!/usr/bin/env python import os import sys import subprocess import time import copy import re import json from collections import OrderedDict from BiochemPy import Reactions, Compounds compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() Update_Reactions = 0 for rxn in reactions_dict: if (reactions_dict[rxn]['status'] == 'EMPTY'): continue for cpd in reactions_dict[rxn]['compound_ids'].split(';'): if (compounds_dict[cpd]['is_obsolete'] == 1): if (compounds_dict[cpd]['linked_compound'] == 'null'): print( "Warning: missing linked compound for obsolete compound: " + cpd) continue lnkd_cpd = sorted(
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Reactions_Codes = ReactionsHelper.generateCodes(Reactions_Dict) Update_Reactions = 0 for code in sorted(Reactions_Codes.keys()): primary_rxn = sorted(Reactions_Codes[code].keys())[0] # print primary_rxn,code,Reactions_Dict[primary_rxn]["linked_reaction"],Reactions_Dict[primary_rxn]["is_obsolete"] Reactions_Dict[primary_rxn]["is_obsolete"] = 0 if (len(Reactions_Codes[code].keys()) == 1): Reactions_Dict[primary_rxn]["linked_reaction"] = "null" else: for rxn in Reactions_Codes[code].keys(): rxn_list = ";".join( sorted(x for x in Reactions_Codes[code].keys() if x != rxn)) Reactions_Dict[rxn]["linked_reaction"] = rxn_list if (rxn != primary_rxn): print rxn, code, rxn_list, Reactions_Dict[rxn][ "linked_reaction"], Reactions_Dict[rxn]["is_obsolete"] Reactions_Dict[rxn]["is_obsolete"] = 1
#!/usr/bin/env python import os, sys, re, copy from csv import DictReader from collections import OrderedDict temp = list() header = True Biochem = "MetaCyc" Biochem_Root = "../../Biochemistry/Aliases/Provenance/Primary_Databases/" sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() reactions_codes = reactions_helper.generateCodes(reactions_dict) Default_Rxn = { "id": "cpd00001", "name": "null", "abbreviation": "null", "aliases": "null", "code": "null", "stoichiometry": "null", "equation": "null", "definition": "null", "reversibility": "=", "direction": "=", "deltag": "10000000",
from . import Schemas as schemas from BiochemPy import Reactions reactions_helper = Reactions() from .error_reporting import find_new_errors, report_errors from jsonschema import Draft4Validator from collections import defaultdict, Counter import re import argparse import json def parse_rxn(_rxn): # returns a tuple of dicts with compounds as keys and stoich as values _rxn = _rxn.translate(str.maketrans("", "", "()")) return [dict([compound[:-3].split()[::-1] for compound in half.split(' + ')]) if half else {} for half in re.split(' <?=>? ', _rxn)] def get_atom_count(compoundDict, complist): atom_counts = Counter() for id, stoich in complist.items(): for comp in compoundDict: if(id != comp['id']): continue if comp['formula'] is None or comp['formula'] == 'null': continue for pair in re.findall('([A-Z][a-z]?)(\d*)', comp['formula']): if not pair[1]: atom_counts[pair[0]] += float(stoich) else:
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Compound_To_Merge_From = "cpd00013" Compound_To_Merge_To = "cpd19013" Cpds_Rxns_Dict = dict() Rxns_Cpds_Dict = dict() for rxn in Reactions_Dict.keys(): if (Reactions_Dict[rxn]["status"] == "EMPTY"): continue for rgt in Reactions_Dict[rxn]["stoichiometry"].split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) if (cpd not in Cpds_Rxns_Dict): Cpds_Rxns_Dict[cpd] = dict() Cpds_Rxns_Dict[cpd][rxn] = 1
########################################################## # # Load disambiguation details # ########################################################## disambiguated_reactions = list() with open(File) as jf: Disambiguated_Compound = json.load(jf) disambiguated_reactions = Disambiguated_Compound['reactions'] #Load Reactions #Using reaction provenance and disambiguation details #We find the list of reactions that belong to either #the original compound or the newly disambiguated compound reactions_helper = Reactions() reaction_names_dict = reactions_helper.loadNames() reaction_ecs_dict = reactions_helper.loadECs() for object in disambiguated_reactions: original_names = list() if (object['from']['id'] in reaction_names_dict): original_names = reaction_names_dict[object['from']['id']] disambig_names = list() if (object['to']['id'] in reaction_names_dict): disambig_names = reaction_names_dict[object['to']['id']] for name in object['names']: if (object['names'][name] == "true"): if (name not in original_names): original_names.append(name) if (name in disambig_names):
CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() MS_Aliases_Dict = CompoundsHelper.loadMSAliases(["MetaCyc", "PlantCyc"]) for cpd in MS_Aliases_Dict: if ('PlantCyc' not in MS_Aliases_Dict[cpd]): MS_Aliases_Dict[cpd]['PlantCyc'] = [] if ('MetaCyc' not in MS_Aliases_Dict[cpd]): MS_Aliases_Dict[cpd]['MetaCyc'] = [] print("\t".join([ cpd, "|".join(MS_Aliases_Dict[cpd]['PlantCyc']), "|".join(MS_Aliases_Dict[cpd]["MetaCyc"]) ])) #Load Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() MS_Aliases_Dict = ReactionsHelper.loadMSAliases(["MetaCyc", "PlantCyc"]) for rxn in MS_Aliases_Dict: if ('PlantCyc' not in MS_Aliases_Dict[rxn]): MS_Aliases_Dict[rxn]['PlantCyc'] = [] if ('MetaCyc' not in MS_Aliases_Dict[rxn]): MS_Aliases_Dict[rxn]['MetaCyc'] = [] print("\t".join([ rxn, "|".join(MS_Aliases_Dict[rxn]['PlantCyc']), "|".join(MS_Aliases_Dict[rxn]["MetaCyc"]) ]))
#!/usr/bin/env python import os, sys, re temp = list() from BiochemPy import Reactions, Compounds compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() reactions_codes = reactions_helper.generateCodes(reactions_dict, stoich=False, transport=False) names_dict = compounds_helper.loadNames() searchnames_dict = dict() for msid in sorted(names_dict): for name in names_dict[msid]: searchname = compounds_helper.searchname(name) #Avoid redundancy where possible if (searchname not in searchnames_dict): searchnames_dict[searchname] = msid mhc = open('Mishit_Compound_Names.txt', 'w') with open('Parsed_Enzyme_Equations.txt') as fh: for line in fh.readlines(): line = line.strip() (id, old_equation) = line.split('\t') array = re.split(' (<?=>?|\+) ', old_equation) new_array = list() mishit = False for i in range(len(array)):
for source in compound_aliases_dict[cpd]: if ('Cyc' not in source and source != 'KEGG' and source != 'metanetx.chemical'): Model = True sources['Model'][source] = 1 if (Model is True): if (KEGG_MetaCyc is True or BioCyc is True): compound_counts['Model (Integrated)'] += 1 else: compound_counts['Model (Unintegrated)'] += 1 if (KEGG_MetaCyc is False and BioCyc is False and Model is False): compound_counts['Orphans'] += 1 reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() reaction_aliases_dict = reactions_helper.loadMSAliases() reaction_counts = dict() for key in column_keys: reaction_counts[key] = 0 for rxn in reactions_dict: if (reactions_dict[rxn]['status'] == 'EMPTY'): continue if (reactions_dict[rxn]['is_obsolete'] == 1): continue
#!/usr/bin/env python import os, sys temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Update_Reactions = 0 status_file = open("Status_Changes.txt", 'w') for rxn in sorted(Reactions_Dict.keys()): if (Reactions_Dict[rxn]["status"] == "EMPTY"): continue Rxn_Cpds_Array = ReactionsHelper.parseStoich( Reactions_Dict[rxn]["stoichiometry"]) new_status = ReactionsHelper.balanceReaction(Rxn_Cpds_Array) old_status = Reactions_Dict[rxn]["status"] #Need to handle reactions with polymers if (new_status == "Duplicate reagents"): new_status = "NB" continue if (new_status != old_status and "CK" not in old_status): print("Changing Status for " + rxn + " from " + old_status + " to " + new_status) status_file.write(rxn + "\t" + old_status + "\t" + new_status + "\n")
#!/usr/bin/env python from BiochemPy import Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Update_Reactions=0 for rxn in sorted(Reactions_Dict.keys()): if(Reactions_Dict[rxn]["status"] == "EMPTY"): continue old_stoichiometry=Reactions_Dict[rxn]["stoichiometry"] Rxn_Cpds_Array=ReactionsHelper.parseStoich(old_stoichiometry) is_transport = ReactionsHelper.isTransport(Rxn_Cpds_Array) if(is_transport != Reactions_Dict[rxn]["is_transport"]): print("Updating: ",rxn,is_transport) Reactions_Dict[rxn]["is_transport"] = is_transport Update_Reactions+=1 if(Update_Reactions>0): print("Saving adjusted is_transport flag for "+str(Update_Reactions)+" reactions") ReactionsHelper.saveReactions(Reactions_Dict)
remove_index = 0 remove_string = 'ontology' compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() for header in range(len(compounds_helper.Headers)): if (compounds_helper.Headers[header] == remove_string): remove_index = header del compounds_helper.Headers[remove_index] for cpd in compounds_dict: del compounds_dict[cpd][remove_string] compounds_helper.saveCompounds(compounds_dict) reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() for header in range(len(reactions_helper.Headers)): if (reactions_helper.Headers[header] == remove_string): remove_index = header del reactions_helper.Headers[remove_index] for rxn in reactions_dict: del reactions_dict[rxn][remove_string] reactions_helper.saveReactions(reactions_dict)
#!/usr/bin/env python from BiochemPy import Reactions, Compounds ########################################## # Reprinting compounds, aliases and names ########################################## compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() compounds_helper.saveCompounds(compounds_dict) aliases_dict = compounds_helper.loadMSAliases() compounds_helper.saveAliases(aliases_dict) names_dict = compounds_helper.loadNames() compounds_helper.saveNames(names_dict) ############################################### # Reprinting reactions, aliases, names and ecs ############################################### reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() for rxn in reactions_dict: reactions_helper.rebuildReaction(reactions_dict[rxn]) reactions_helper.saveReactions(reactions_dict) aliases_dict = reactions_helper.loadMSAliases() reactions_helper.saveAliases(aliases_dict) names_dict = reactions_helper.loadNames() reactions_helper.saveNames(names_dict) ecs_dict = reactions_helper.loadECs() reactions_helper.saveECs(ecs_dict)
print(cpd_obj['id'], cpd_obj['inchikey']) if ('parent_class' in cpd_obj['ontology']): for cpd in cpd_obj['ontology']['parent_class'].split(";"): if (cpd_obj['id'] not in children_parents): children_parents[cpd_obj['id']] = dict() children_parents[cpd_obj['id']][cpd] = 1 if (cpd not in parents_children): parents_children[cpd] = dict() parents_children[cpd][cpd_obj['id']] = 1 # print( cpd_obj['id'], cpd_obj['ontology'], isinstance(cpd_obj['ontology'],dict) ) print(parents_children) #in first pass, find all reactions that contain either children or parents reactions_to_use = {'parent': [], 'child': [], 'mixed': []} reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() for rxn in reactions_dict: rxn_obj = reactions_dict[rxn] for cpd in rxn_obj['compound_ids'].split(';'): if (cpd != cpd_exception): continue if (cpd in parents_children): reactions_to_use['parent'].append(rxn) if (cpd in children_parents): reactions_to_use['child'].append(rxn) #First take all child reactions, and generate a set where all relationships are parents
#!/usr/bin/env python import os, sys, time from csv import DictReader temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions ReactionsHelper = Reactions() Reactions_Dict = ReactionsHelper.loadReactions() Template_Dir = '../../../Templates/' Unbalanced_Reactions = dict() for template in os.listdir(Template_Dir): if not os.path.isdir(os.path.join(Template_Dir, template)): continue Unbalanced_Reactions[template] = dict() with open(os.path.join(Template_Dir, template, 'Reactions.tsv')) as infile: for line in DictReader(infile, dialect='excel-tab'): if (line['id'] in Reactions_Dict and "OK" not in Reactions_Dict[line['id']]['status']): Unbalanced_Reactions[template][line['id']] = 1 time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(time.time())) file = open("Template_Update.txt", "a") file.write("====================================\n") file.write(time_str + "\n") for template in sorted(Unbalanced_Reactions.keys()): file.write(template + ": " + str(len(Unbalanced_Reactions[template].keys())) + "\n")
else: structure = structure.split('-')[0] if (structure in mnx_inchikey_dict): matched_mnx = mnx_inchikey_dict[structure] #As of 06/28/2019, there was: # 17,071 matches based on full inchikey # 17,863 matches using proton-neutral inchikey # 18,559 matches using stereo-neutral inchikey if (matched_mnx is None): continue seed_mnx_structural_map[cpd] = mnx_inchikey_dict[structure] reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() complete_mol_rxns_dict = dict() incomplete_mol_rxns_dict = dict() for rxn in reactions_dict: if (reactions_dict[rxn]['status'] == 'EMPTY'): continue rxn_cpds_array = reactions_helper.parseStoich( reactions_dict[rxn]["stoichiometry"]) All_Mol = True Some_Mol = False for rgt in rxn_cpds_array: if (rgt['compound'] not in seed_mnx_structural_map):
if(Found_Cpd == ""): continue Is_Old=False if(Found_Cpd != "" and model in Old_Aliases and Found_Cpd in Old_Aliases[model]): Is_Old=True if(model not in Prov_Rxns): Prov_Rxns[model]=dict() Prov_Rxns[model][rxn]=Is_Old #Load Reactions #Using reaction provenance and disambiguation details #We find the list of reactions that belong to either #the original compound or the newly disambiguated compound reactions_helper = Reactions() reaction_aliases_dict = reactions_helper.loadMSAliases() reaction_names_dict = reactions_helper.loadNames() reaction_ecs_dict = reactions_helper.loadECs() reactions_dict = reactions_helper.loadReactions() #Three types of reactions: # 1) Retain original/old reaction keep_reactions=dict() # 2) Retain but change compound in stoichiometry update_reactions=dict() # 3) Split into new reaction with updated stoichiometry disambiguate_reactions=dict() for rxn in reaction_aliases_dict: Is_Old_Dict=dict() Source_Alias=dict()