#!/usr/bin/env python
import os, sys
temp=list();
header=1;

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

compounds_helper = Compounds()
compounds_dict = compounds_helper.loadCompounds()
cpds_aliases_dict = compounds_helper.loadMSAliases()
cpds_names_dict = compounds_helper.loadNames()

# We actually don't want obsolete reactions and compounds in our database
# So we're striving to remove any 'new' ones that are obsolete
# Any information attached to them should be associated with their linked counterpart
# We need to retain older compounds that are now obsolete as these may be present in prior published models

# The number used here is the last compound entered before we re-integrated updates from KEGG and MetaCyc
# In the fall of 2018, so after this point, we'll take out obsolete compounds
last_cpd_str='cpd31000'
last_cpd_int=int(last_cpd_str[3:])

delete_cpds=list()
for cpd in compounds_dict:
    cpd_int = int(cpd[3:])
    if(cpd_int > last_cpd_int and compounds_dict[cpd]['is_obsolete']):
        delete_cpds.append(cpd)

for cpd in delete_cpds:
Ejemplo n.º 2
0
#!/usr/bin/env python
import os, sys
from csv import DictReader
temp = list()
header = 1

sys.path.append('../../Libs/Python')
from BiochemPy import Reactions, Compounds, InChIs

CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
Aliases_Dict = CompoundsHelper.loadMSAliases()
Names_Dict = CompoundsHelper.loadNames()

Source_Classes = dict()
reader = DictReader(
    open('../../../Biochemistry/Aliases/Source_Classifiers.txt'),
    dialect='excel-tab')
for line in reader:
    if (line['Source Type'] not in Source_Classes):
        Source_Classes[line['Source Type']] = dict()
    Source_Classes[line['Source Type']][line['Source ID']] = 1

for cpd in sorted(Compounds_Dict.keys()):
    if (cpd not in Aliases_Dict):
        continue

    Cpd_Aliases = dict()
    Alias_Count = 0
    for source_type in 'Primary Database', 'Secondary Database', 'Published Model':
        for source in sorted(Aliases_Dict[cpd].keys()):
                            'formula': array[1],
                            'charge': array[2]
                        }

#Load Curated Structures
Ignored_Structures = dict()
with open(Structures_Root + "Ignored_ModelSEED_Structures.txt") as ignore_file:
    for line in ignore_file.readlines():
        array = line.split('\t')
        Ignored_Structures[array[0]] = 1
ignore_file.close()

#Load Structures and Aliases
Structures_Dict = CompoundsHelper.loadStructures(
    ["SMILE", "InChIKey", "InChI"], ["KEGG", "MetaCyc"])
MS_Aliases_Dict = CompoundsHelper.loadMSAliases(["KEGG", "MetaCyc"])

master_structs_file = open(Structures_Root + "All_ModelSEED_Structures.txt",
                           'w')
unique_structs_file = open(Structures_Root + "Unique_ModelSEED_Structures.txt",
                           'w')
unique_structs_file.write("ID\tType\tAliases\tFormula\tCharge\tStructure\n")
structure_conflicts_file = open("Structure_Conflicts.txt", 'w')
formula_conflicts_file = open("Formula_Conflicts.txt", 'w')
for msid in sorted(MS_Aliases_Dict.keys()):

    #Build collection of all structures for the ModelSEED ID
    Structs = dict()
    Formulas = dict()
    for source in 'KEGG', 'MetaCyc':
        if (source not in MS_Aliases_Dict[msid].keys()):
Ejemplo n.º 4
0
if (compounds_dict[disambiguating_cpd]['is_obsolete'] == 1):
    print("Warning: compound " + disambiguating_cpd +
          " is obsolete, consider using the non-obsolete version")

Disambiguation_Object['from'] = {
    'id': disambiguating_cpd,
    'structures': {},
    'aliases': {},
    'names': {},
    'formula': compounds_dict[disambiguating_cpd]['formula'],
    'charge': compounds_dict[disambiguating_cpd]['charge'],
    'mass': compounds_dict[disambiguating_cpd]['mass']
}

Aliases_Dict = compounds_helper.loadMSAliases()
Names_Dict = compounds_helper.loadNames()
Structures_Dict = compounds_helper.loadStructures(["InChI", "SMILE"],
                                                  ["KEGG", "MetaCyc"])

#For reverse lookup
reverse_aliases_dict = dict()
for cpd in Aliases_Dict:
    for source in Aliases_Dict[cpd]:
        for alias in Aliases_Dict[cpd][source]:
            if (alias not in reverse_aliases_dict):
                reverse_aliases_dict[alias] = dict()
            if (source not in reverse_aliases_dict[alias]):
                reverse_aliases_dict[alias][source] = dict()
            reverse_aliases_dict[alias][source][cpd] = 1
Ejemplo n.º 5
0
compounds_dict = compounds_helper.loadCompounds()

names_dict = compounds_helper.loadNames()
searchnames_dict = dict()
all_names_dict = dict()
new_name_count = dict()
for msid in sorted(names_dict):
    for name in names_dict[msid]:
        all_names_dict[name] = 1

        searchname = compounds_helper.searchname(name)
        #Avoid redundancy where possible
        if (searchname not in searchnames_dict):
            searchnames_dict[searchname] = msid

original_alias_dict = compounds_helper.loadMSAliases()
source_alias_dict = dict()
all_aliases = dict()
new_alias_count = dict()
for msid in original_alias_dict:
    for source in original_alias_dict[msid]:
        if (source not in source_alias_dict):
            source_alias_dict[source] = dict()

        for alias in original_alias_dict[msid][source]:
            if (alias not in all_aliases):
                all_aliases[alias] = dict()
            all_aliases[alias][msid] = 1

            if (alias not in source_alias_dict[source]):
                source_alias_dict[source][alias] = list()
Ejemplo n.º 6
0
#!/usr/bin/env python
import os
import sys
import json
from BiochemPy import Compounds, Reactions

#Load Compounds
CompoundsHelper = Compounds()
Compounds_Dict = CompoundsHelper.loadCompounds()
MS_Aliases_Dict = CompoundsHelper.loadMSAliases(["MetaCyc", "PlantCyc"])

for cpd in MS_Aliases_Dict:
    if ('PlantCyc' not in MS_Aliases_Dict[cpd]):
        MS_Aliases_Dict[cpd]['PlantCyc'] = []
    if ('MetaCyc' not in MS_Aliases_Dict[cpd]):
        MS_Aliases_Dict[cpd]['MetaCyc'] = []

    print("\t".join([
        cpd, "|".join(MS_Aliases_Dict[cpd]['PlantCyc']),
        "|".join(MS_Aliases_Dict[cpd]["MetaCyc"])
    ]))

#Load Reactions
ReactionsHelper = Reactions()
Reactions_Dict = ReactionsHelper.loadReactions()
MS_Aliases_Dict = ReactionsHelper.loadMSAliases(["MetaCyc", "PlantCyc"])

for rxn in MS_Aliases_Dict:
    if ('PlantCyc' not in MS_Aliases_Dict[rxn]):
        MS_Aliases_Dict[rxn]['PlantCyc'] = []
    if ('MetaCyc' not in MS_Aliases_Dict[rxn]):