Beispiel #1
0
#!/usr/bin/env python
import os, sys, re
from csv import DictReader
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()
Aliases_Dict = ReactionsHelper.loadMSAliases()
Names_Dict = ReactionsHelper.loadNames()
ECs_Dict = ReactionsHelper.loadECs()

Source_Classes = dict()
reader = DictReader(open('../../Biochemistry/Aliases/Source_Classifiers.txt'),
                    dialect='excel-tab')
for line in reader:
    if (line['Source Type'] not in Source_Classes):
        Source_Classes[line['Source Type']] = dict()
    Source_Classes[line['Source Type']][line['Source ID']] = 1

for rxn in sorted(Reactions_Dict.keys()):
    if (rxn not in Aliases_Dict):
        continue

    Rxn_Source_Aliases = dict()
    Rxn_Aliases = dict()
    Alias_Count = 0
    for source_type in 'Primary Database', 'Secondary Database', 'Published Model':
#!/usr/bin/env python
from BiochemPy import Reactions

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

Update_Reactions = 0
status_file = open("Status_Changes_After_Proton_Adjustment.txt", 'w')
for rxn in sorted(Reactions_Dict.keys()):

    if (Reactions_Dict[rxn]["status"] == "EMPTY"):
        continue

    #Find statuses that only have proton imbalance
    if ("MI" not in Reactions_Dict[rxn]["status"]):
        continue

    Status_Blocks = Reactions_Dict[rxn]["status"].split("|")
    for block in Status_Blocks:
        if ("MI" not in block):
            continue

        block = block.replace("MI:", "")
        elements = block.split("/")

        #only making adjustments if mass imbalance is a single element,
        #and the element is hydrogen
        if (len(elements) > 1 or not elements[0].startswith("H:")):
            continue

        (element, number) = elements[0].split(":")
        if(neighbor not in neighbor_list):
            neighbor_list.add(neighbor)
        if(neighbor in cpd_dict):
            traverse_ontological_network(neighbor, cpd_dict, neighbor_list)

# We iterate through the compounds, and create lists of all their generic parents
# Doesn't have to be their immediate parent.
# We do this as we want to explore all possible links between generic and specific reactions
child_parent_sets = dict()
for child in compounds_dict:
    parents_list=set()
    traverse_ontological_network(child, compounds_dict, parents_list)
    child_parent_sets[child]=parents_list

# load reactions and their codes for matching
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
reactions_codes = reactions_helper.generateCodes(reactions_dict)

(file_stub,suffix) = ONTOLOGY_FILE.rsplit('.', 1)
result_file='.'.join([file_stub,'out'])

outfile = open(result_file,'w')
outfile.write('\t'.join(["Original Reaction","Matched Reaction","Original Description","Matched Description","Swapped Compounds",'\n']))
for rxn in sorted(reactions_dict.keys()):
    if(reactions_dict[rxn]["status"] == "EMPTY"):
        continue

    if(reactions_dict[rxn]["is_obsolete"] == 1):
        continue
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

Update_Reactions = 0
for rxn in sorted(Reactions_Dict.keys()):
    if (Reactions_Dict[rxn]["status"] == "EMPTY"):
        continue

    Rxn_Cpds_Array = ReactionsHelper.parseStoich(
        Reactions_Dict[rxn]["stoichiometry"])
    Stoichiometry = ReactionsHelper.buildStoich(Rxn_Cpds_Array)
    if (Stoichiometry != Reactions_Dict[rxn]["stoichiometry"]):
        ReactionsHelper.rebuildReaction(Reactions_Dict[rxn], Stoichiometry)
        Update_Reactions += 1

print "Saving rebuilt equations for " + str(Update_Reactions) + " reactions"
ReactionsHelper.saveReactions(Reactions_Dict)
Beispiel #5
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
rxns_aliases_dict = reactions_helper.loadMSAliases()
rxns_names_dict = reactions_helper.loadNames()
rxns_ecs_dict = reactions_helper.loadECs()

# We actually don't want obsolete reactions and compounds in our database
# So we're striving to remove any 'new' ones that are obsolete
# Any information attached to them should be associated with their linked counterpart
# We need to retain older reactions that are now obsolete as these may be present in prior published models

# The number used here is the last reaction entered before we re-integrated updates from KEGG and MetaCyc
# In the fall of 2018, so after this point, we'll take out obsolete reactions
last_rxn_str = 'rxn39250'
last_rxn_int = int(last_rxn_str[3:])

delete_rxns = list()
for rxn in reactions_dict:
    rxn_int = int(rxn[3:])
    if (rxn_int > last_rxn_int and reactions_dict[rxn]['is_obsolete']):
        delete_rxns.append(rxn)

for rxn in delete_rxns:
#!/usr/bin/env python
import os, sys
from BiochemPy import Compounds, Reactions, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

print("\n================")
print(
    "For Section: \"Computation of thermodynamic properties of ModelSEED compounds and reaction\"\n"
)

MS_Complete_Structures = dict()
with open("../../../Biochemistry/Structures/Unique_ModelSEED_Structures.txt"
          ) as fh:
    for line in fh.readlines():
        line = line.strip()
        array = line.split('\t')

        if ("InChI" in array[5]):
            MS_Complete_Structures[array[5]] = 1

MNX_Complete_Structures = dict()
with open("../../../Biochemistry/Structures/MetaNetX/chem_prop.tsv") as fh:
    header = 1
    for line in fh.readlines():
        if (line[0] == "#"):
            continue
#!/usr/bin/env python
import os
import sys
import subprocess
import time
import copy
import re
import json
from collections import OrderedDict
from BiochemPy import Reactions, Compounds

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

Update_Reactions = 0
for rxn in reactions_dict:
    if (reactions_dict[rxn]['status'] == 'EMPTY'):
        continue

    for cpd in reactions_dict[rxn]['compound_ids'].split(';'):
        if (compounds_dict[cpd]['is_obsolete'] == 1):
            if (compounds_dict[cpd]['linked_compound'] == 'null'):
                print(
                    "Warning: missing linked compound for obsolete compound: "
                    + cpd)
                continue

            lnkd_cpd = sorted(
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()
Reactions_Codes = ReactionsHelper.generateCodes(Reactions_Dict)

Update_Reactions = 0
for code in sorted(Reactions_Codes.keys()):

    primary_rxn = sorted(Reactions_Codes[code].keys())[0]
    #    print primary_rxn,code,Reactions_Dict[primary_rxn]["linked_reaction"],Reactions_Dict[primary_rxn]["is_obsolete"]
    Reactions_Dict[primary_rxn]["is_obsolete"] = 0

    if (len(Reactions_Codes[code].keys()) == 1):
        Reactions_Dict[primary_rxn]["linked_reaction"] = "null"
    else:
        for rxn in Reactions_Codes[code].keys():
            rxn_list = ";".join(
                sorted(x for x in Reactions_Codes[code].keys() if x != rxn))
            Reactions_Dict[rxn]["linked_reaction"] = rxn_list
            if (rxn != primary_rxn):
                print rxn, code, rxn_list, Reactions_Dict[rxn][
                    "linked_reaction"], Reactions_Dict[rxn]["is_obsolete"]
                Reactions_Dict[rxn]["is_obsolete"] = 1
Beispiel #9
0
#!/usr/bin/env python
import os, sys, re, copy
from csv import DictReader
from collections import OrderedDict
temp = list()
header = True

Biochem = "MetaCyc"
Biochem_Root = "../../Biochemistry/Aliases/Provenance/Primary_Databases/"

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
reactions_codes = reactions_helper.generateCodes(reactions_dict)

Default_Rxn = {
    "id": "cpd00001",
    "name": "null",
    "abbreviation": "null",
    "aliases": "null",
    "code": "null",
    "stoichiometry": "null",
    "equation": "null",
    "definition": "null",
    "reversibility": "=",
    "direction": "=",
    "deltag": "10000000",
from . import Schemas as schemas
from BiochemPy import Reactions
reactions_helper = Reactions()

from .error_reporting import find_new_errors, report_errors
from jsonschema import Draft4Validator
from collections import defaultdict, Counter
import re
import argparse
import json


def parse_rxn(_rxn):
    # returns a tuple of dicts with compounds as keys and stoich as values
    _rxn = _rxn.translate(str.maketrans("", "", "()"))
    return [dict([compound[:-3].split()[::-1] for compound in half.split(' + ')])
            if half else {} for half in re.split(' <?=>? ', _rxn)]


def get_atom_count(compoundDict, complist):
    atom_counts = Counter()
    for id, stoich in complist.items():
        for comp in compoundDict:
            if(id != comp['id']):
                continue
            if comp['formula'] is None or comp['formula'] == 'null':
                continue
            for pair in re.findall('([A-Z][a-z]?)(\d*)', comp['formula']):
                if not pair[1]:
                    atom_counts[pair[0]] += float(stoich)
                else:
Beispiel #11
0
#!/usr/bin/env python
import os, sys

temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()

Compound_To_Merge_From = "cpd00013"
Compound_To_Merge_To = "cpd19013"

Cpds_Rxns_Dict = dict()
Rxns_Cpds_Dict = dict()
for rxn in Reactions_Dict.keys():
    if (Reactions_Dict[rxn]["status"] == "EMPTY"):
        continue

    for rgt in Reactions_Dict[rxn]["stoichiometry"].split(";"):
        (coeff, cpd, cpt, index, name) = rgt.split(":", 4)

        if (cpd not in Cpds_Rxns_Dict):
            Cpds_Rxns_Dict[cpd] = dict()
        Cpds_Rxns_Dict[cpd][rxn] = 1
Beispiel #12
0
##########################################################
#
# Load disambiguation details
#
##########################################################

disambiguated_reactions = list()
with open(File) as jf:
    Disambiguated_Compound = json.load(jf)
disambiguated_reactions = Disambiguated_Compound['reactions']

#Load Reactions
#Using reaction provenance and disambiguation details
#We find the list of reactions that belong to either
#the original compound or the newly disambiguated compound
reactions_helper = Reactions()
reaction_names_dict = reactions_helper.loadNames()
reaction_ecs_dict = reactions_helper.loadECs()

for object in disambiguated_reactions:
    original_names = list()
    if (object['from']['id'] in reaction_names_dict):
        original_names = reaction_names_dict[object['from']['id']]
    disambig_names = list()
    if (object['to']['id'] in reaction_names_dict):
        disambig_names = reaction_names_dict[object['to']['id']]
    for name in object['names']:
        if (object['names'][name] == "true"):
            if (name not in original_names):
                original_names.append(name)
            if (name in disambig_names):
Beispiel #13
0
CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
MS_Aliases_Dict = CompoundsHelper.loadMSAliases(["MetaCyc", "PlantCyc"])

for cpd in MS_Aliases_Dict:
    if ('PlantCyc' not in MS_Aliases_Dict[cpd]):
        MS_Aliases_Dict[cpd]['PlantCyc'] = []
    if ('MetaCyc' not in MS_Aliases_Dict[cpd]):
        MS_Aliases_Dict[cpd]['MetaCyc'] = []

    print("\t".join([
        cpd, "|".join(MS_Aliases_Dict[cpd]['PlantCyc']),
        "|".join(MS_Aliases_Dict[cpd]["MetaCyc"])
    ]))

#Load Reactions
ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()
MS_Aliases_Dict = ReactionsHelper.loadMSAliases(["MetaCyc", "PlantCyc"])

for rxn in MS_Aliases_Dict:
    if ('PlantCyc' not in MS_Aliases_Dict[rxn]):
        MS_Aliases_Dict[rxn]['PlantCyc'] = []
    if ('MetaCyc' not in MS_Aliases_Dict[rxn]):
        MS_Aliases_Dict[rxn]['MetaCyc'] = []

    print("\t".join([
        rxn, "|".join(MS_Aliases_Dict[rxn]['PlantCyc']),
        "|".join(MS_Aliases_Dict[rxn]["MetaCyc"])
    ]))
#!/usr/bin/env python
import os, sys, re
temp = list()
from BiochemPy import Reactions, Compounds
compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
reactions_codes = reactions_helper.generateCodes(reactions_dict,
                                                 stoich=False,
                                                 transport=False)

names_dict = compounds_helper.loadNames()
searchnames_dict = dict()
for msid in sorted(names_dict):
    for name in names_dict[msid]:
        searchname = compounds_helper.searchname(name)
        #Avoid redundancy where possible
        if (searchname not in searchnames_dict):
            searchnames_dict[searchname] = msid

mhc = open('Mishit_Compound_Names.txt', 'w')
with open('Parsed_Enzyme_Equations.txt') as fh:
    for line in fh.readlines():
        line = line.strip()
        (id, old_equation) = line.split('\t')
        array = re.split(' (<?=>?|\+) ', old_equation)

        new_array = list()
        mishit = False
        for i in range(len(array)):
    for source in compound_aliases_dict[cpd]:
        if ('Cyc' not in source and source != 'KEGG'
                and source != 'metanetx.chemical'):
            Model = True
            sources['Model'][source] = 1

    if (Model is True):
        if (KEGG_MetaCyc is True or BioCyc is True):
            compound_counts['Model (Integrated)'] += 1
        else:
            compound_counts['Model (Unintegrated)'] += 1

    if (KEGG_MetaCyc is False and BioCyc is False and Model is False):
        compound_counts['Orphans'] += 1

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
reaction_aliases_dict = reactions_helper.loadMSAliases()

reaction_counts = dict()
for key in column_keys:
    reaction_counts[key] = 0

for rxn in reactions_dict:

    if (reactions_dict[rxn]['status'] == 'EMPTY'):
        continue

    if (reactions_dict[rxn]['is_obsolete'] == 1):
        continue
Beispiel #16
0
#!/usr/bin/env python
import os, sys
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

Update_Reactions = 0
status_file = open("Status_Changes.txt", 'w')
for rxn in sorted(Reactions_Dict.keys()):
    if (Reactions_Dict[rxn]["status"] == "EMPTY"):
        continue

    Rxn_Cpds_Array = ReactionsHelper.parseStoich(
        Reactions_Dict[rxn]["stoichiometry"])
    new_status = ReactionsHelper.balanceReaction(Rxn_Cpds_Array)
    old_status = Reactions_Dict[rxn]["status"]

    #Need to handle reactions with polymers
    if (new_status == "Duplicate reagents"):
        new_status = "NB"
        continue

    if (new_status != old_status and "CK" not in old_status):
        print("Changing Status for " + rxn + " from " + old_status + " to " +
              new_status)
        status_file.write(rxn + "\t" + old_status + "\t" + new_status + "\n")
Beispiel #17
0
#!/usr/bin/env python
from BiochemPy import Reactions

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

Update_Reactions=0
for rxn in sorted(Reactions_Dict.keys()):

    if(Reactions_Dict[rxn]["status"] == "EMPTY"):
        continue

    old_stoichiometry=Reactions_Dict[rxn]["stoichiometry"]
    Rxn_Cpds_Array=ReactionsHelper.parseStoich(old_stoichiometry)

    is_transport = ReactionsHelper.isTransport(Rxn_Cpds_Array)

    if(is_transport != Reactions_Dict[rxn]["is_transport"]):
        print("Updating: ",rxn,is_transport)
        Reactions_Dict[rxn]["is_transport"] = is_transport
        Update_Reactions+=1

if(Update_Reactions>0):
    print("Saving adjusted is_transport flag for "+str(Update_Reactions)+" reactions")
    ReactionsHelper.saveReactions(Reactions_Dict)
remove_index = 0
remove_string = 'ontology'

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()

for header in range(len(compounds_helper.Headers)):
    if (compounds_helper.Headers[header] == remove_string):
        remove_index = header

del compounds_helper.Headers[remove_index]

for cpd in compounds_dict:
    del compounds_dict[cpd][remove_string]

compounds_helper.saveCompounds(compounds_dict)

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

for header in range(len(reactions_helper.Headers)):
    if (reactions_helper.Headers[header] == remove_string):
        remove_index = header

del reactions_helper.Headers[remove_index]

for rxn in reactions_dict:
    del reactions_dict[rxn][remove_string]

reactions_helper.saveReactions(reactions_dict)
Beispiel #19
0
#!/usr/bin/env python
from BiochemPy import Reactions, Compounds

##########################################
# Reprinting compounds, aliases and names
##########################################
compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
compounds_helper.saveCompounds(compounds_dict)
aliases_dict = compounds_helper.loadMSAliases()
compounds_helper.saveAliases(aliases_dict)
names_dict = compounds_helper.loadNames()
compounds_helper.saveNames(names_dict)

###############################################
# Reprinting reactions, aliases, names and ecs
###############################################
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()
for rxn in reactions_dict:
    reactions_helper.rebuildReaction(reactions_dict[rxn])
reactions_helper.saveReactions(reactions_dict)
aliases_dict = reactions_helper.loadMSAliases()
reactions_helper.saveAliases(aliases_dict)
names_dict = reactions_helper.loadNames()
reactions_helper.saveNames(names_dict)
ecs_dict = reactions_helper.loadECs()
reactions_helper.saveECs(ecs_dict)
Beispiel #20
0
    print(cpd_obj['id'], cpd_obj['inchikey'])
    if ('parent_class' in cpd_obj['ontology']):
        for cpd in cpd_obj['ontology']['parent_class'].split(";"):
            if (cpd_obj['id'] not in children_parents):
                children_parents[cpd_obj['id']] = dict()
            children_parents[cpd_obj['id']][cpd] = 1
            if (cpd not in parents_children):
                parents_children[cpd] = dict()
            parents_children[cpd][cpd_obj['id']] = 1

#    print( cpd_obj['id'], cpd_obj['ontology'], isinstance(cpd_obj['ontology'],dict) )
print(parents_children)

#in first pass, find all reactions that contain either children or parents
reactions_to_use = {'parent': [], 'child': [], 'mixed': []}
reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

for rxn in reactions_dict:
    rxn_obj = reactions_dict[rxn]
    for cpd in rxn_obj['compound_ids'].split(';'):

        if (cpd != cpd_exception):
            continue

        if (cpd in parents_children):
            reactions_to_use['parent'].append(rxn)
        if (cpd in children_parents):
            reactions_to_use['child'].append(rxn)

#First take all child reactions, and generate a set where all relationships are parents
#!/usr/bin/env python
import os, sys, time
from csv import DictReader
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions

ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()

Template_Dir = '../../../Templates/'
Unbalanced_Reactions = dict()
for template in os.listdir(Template_Dir):
    if not os.path.isdir(os.path.join(Template_Dir, template)):
        continue
    Unbalanced_Reactions[template] = dict()
    with open(os.path.join(Template_Dir, template, 'Reactions.tsv')) as infile:
        for line in DictReader(infile, dialect='excel-tab'):
            if (line['id'] in Reactions_Dict
                    and "OK" not in Reactions_Dict[line['id']]['status']):
                Unbalanced_Reactions[template][line['id']] = 1

time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(time.time()))
file = open("Template_Update.txt", "a")
file.write("====================================\n")
file.write(time_str + "\n")
for template in sorted(Unbalanced_Reactions.keys()):
    file.write(template + ": " +
               str(len(Unbalanced_Reactions[template].keys())) + "\n")
        else:
            structure = structure.split('-')[0]
            if (structure in mnx_inchikey_dict):
                matched_mnx = mnx_inchikey_dict[structure]

    #As of 06/28/2019, there was:
    # 17,071 matches based on full inchikey
    # 17,863 matches using proton-neutral inchikey
    # 18,559 matches using stereo-neutral inchikey

    if (matched_mnx is None):
        continue

    seed_mnx_structural_map[cpd] = mnx_inchikey_dict[structure]

reactions_helper = Reactions()
reactions_dict = reactions_helper.loadReactions()

complete_mol_rxns_dict = dict()
incomplete_mol_rxns_dict = dict()
for rxn in reactions_dict:
    if (reactions_dict[rxn]['status'] == 'EMPTY'):
        continue

    rxn_cpds_array = reactions_helper.parseStoich(
        reactions_dict[rxn]["stoichiometry"])

    All_Mol = True
    Some_Mol = False
    for rgt in rxn_cpds_array:
        if (rgt['compound'] not in seed_mnx_structural_map):
Beispiel #23
0
        if(Found_Cpd == ""):
            continue

        Is_Old=False
        if(Found_Cpd != "" and model in Old_Aliases and Found_Cpd in Old_Aliases[model]):
            Is_Old=True

        if(model not in Prov_Rxns):
            Prov_Rxns[model]=dict()
        Prov_Rxns[model][rxn]=Is_Old
        
#Load Reactions
#Using reaction provenance and disambiguation details
#We find the list of reactions that belong to either
#the original compound or the newly disambiguated compound
reactions_helper = Reactions()
reaction_aliases_dict = reactions_helper.loadMSAliases()
reaction_names_dict = reactions_helper.loadNames()
reaction_ecs_dict = reactions_helper.loadECs()
reactions_dict = reactions_helper.loadReactions()

#Three types of reactions:
# 1) Retain original/old reaction
keep_reactions=dict()
# 2) Retain but change compound in stoichiometry
update_reactions=dict()
# 3) Split into new reaction with updated stoichiometry
disambiguate_reactions=dict()
for rxn in reaction_aliases_dict:
    Is_Old_Dict=dict()
    Source_Alias=dict()