def calculate_distances(smrn):

    smrn['bigg.metabolite'] = smrn['bigg.metabolite'].str.upper()

    # %% Read BIGG model
    model, S = settings.get_ecoli_json()
    B, mets, rxns = convert_to_bipartite(S)
    spl = dict(nx.shortest_path_length(B))
    spl_values = []
    for met in mets:
        r = rxns.intersection(spl[met].keys())
        spl_values += list(map(spl[met].get, r))
    all_distances = (np.array(spl_values) - 1.0) / 2

    smrn_dist = smrn[['bigg.metabolite', 'bigg.reaction']].drop_duplicates()
    smrn_dist['distance'] = pd.np.nan
    for i, row in smrn_dist.iterrows():
        source = row['bigg.metabolite']  # remember we dropped it before
        target = row['bigg.reaction']
        if source.lower() in METS_TO_REMOVE:
            continue
        if target in spl[source]:
            smrn_dist.at[i, 'distance'] = (spl[source][target] - 1.0) / 2.0

    # %% Save data
    smrn_dist = smrn_dist.dropna()
    return smrn_dist, all_distances
Beispiel #2
0
def calculate_distances(smrn):

    smrn['bigg.metabolite'] = smrn['bigg.metabolite'].str.upper()

    # %% Read BIGG model
    model, S = settings.get_ecoli_json()
    B, mets, rxns = convert_to_bipartite(S)
    spl = dict(nx.shortest_path_length(B))
    spl_values = []
    for met in mets:
        r = rxns.intersection(spl[met].keys())
        spl_values += list(map(spl[met].get, r))
    all_distances = (np.array(spl_values) - 1.0) / 2

    smrn_dist = smrn[['bigg.metabolite', 'bigg.reaction']].drop_duplicates()
    smrn_dist['distance'] = pd.np.nan
    for i, row in smrn_dist.iterrows():
        source = row['bigg.metabolite']  # remember we dropped it before
        target = row['bigg.reaction']
        if source.lower() in METS_TO_REMOVE:
            continue
        if target in spl[source]:
            smrn_dist.at[i, 'distance'] = (spl[source][target] - 1.0) / 2.0

    # %% Save data
    smrn_dist = smrn_dist.dropna()
    return smrn_dist, all_distances
Beispiel #3
0
    def get_native_EC_numbers(self):
        model, _ = settings.get_ecoli_json()
        # make a list of all the EC numbers which are mapped to a BiGG reaction
        # in the E. coli model, which in turn is mapped to at least one E. coli
        # gene
        rids_with_genes = set()
        for d in self.ecoli_model['reactions']:
            rid = d['id']
            rule = d['gene_reaction_rule']
            if re.search('b[0-9]+', rule) is not None:
                rids_with_genes.add(rid.lower())

        # use the self.bigg object to convert these BiGG IDs to EC numbers
        bigg_reactions = self.reaction_df.groupby('EC_number').first()
        native_ec = set(bigg_reactions[bigg_reactions['bigg.reaction'].isin(
                                       rids_with_genes)].index)

        # adding Citrate Synthase
        native_ec.add('2.3.3.16')
        return native_ec
# Import iJO1366 from .mat and format for mapping of regulatory interactions from BRENDA

import os
import pandas as pd
import pdb
import settings

# Read BIGG model
model, S = settings.get_ecoli_json()

# Set to lower case
rnames = map(unicode.lower, S.columns)

# Read mapping from KEGG IDs
model_reactions = pd.read_excel('../data/inline-supplementary-material-2.xls', sheetname=2, header=0)
bigg2ec = model_reactions[['Reaction Abbreviation', 'EC Number']]
bigg2ec.rename(columns={'Reaction Abbreviation': 'bigg.reaction',
                        'EC Number': 'EC_number'},
                        inplace=True)
bigg2ec = bigg2ec[~pd.isnull(bigg2ec['EC_Number'])]

# Change all reaction IDs to lower-case (apparently the standards have changed
# since the model was published, and cases are different now).

bigg2ec['bigg.reaction'] = bigg2ec['bigg.reaction'].apply(unicode.lower)

# Confirm that the name of the reaction is the same in the .xls and the .mat file
error_rnames = [item for item in bigg2ec.index if bigg2ec.at[ item, 'bigg.reaction' ] != rnames[item] ]

if len(error_rnames) != 0:
	print('These reactions may be mapped incorrectly, proceed with care. We will replace the names in bigg2ec for consistency:')
Beispiel #5
0
# Import iJO1366 from .mat and format for mapping of regulatory interactions from BRENDA

import os
import pandas as pd
import pdb
import settings

# Read BIGG model
model, S = settings.get_ecoli_json()

# Set to lower case
rnames = map(unicode.lower, S.columns)

# Read mapping from KEGG IDs
model_reactions = pd.read_excel('../data/inline-supplementary-material-2.xls',
                                sheetname=2,
                                header=0)
bigg2ec = model_reactions[['Reaction Abbreviation', 'EC Number']]
bigg2ec.rename(columns={
    'Reaction Abbreviation': 'bigg.reaction',
    'EC Number': 'EC_number'
},
               inplace=True)
bigg2ec = bigg2ec[~pd.isnull(bigg2ec['EC_Number'])]

# Change all reaction IDs to lower-case (apparently the standards have changed
# since the model was published, and cases are different now).

bigg2ec['bigg.reaction'] = bigg2ec['bigg.reaction'].apply(unicode.lower)

# Confirm that the name of the reaction is the same in the .xls and the .mat file
Beispiel #6
0
 def __init__(self):
     self.ecoli_model, self.ecoli_S = settings.get_ecoli_json()
     self.metabolite_df = BiGG._get_metabolite_df()
     self.reaction_df = BiGG._get_reaction_df()