def calculate_distances(smrn): smrn['bigg.metabolite'] = smrn['bigg.metabolite'].str.upper() # %% Read BIGG model model, S = settings.get_ecoli_json() B, mets, rxns = convert_to_bipartite(S) spl = dict(nx.shortest_path_length(B)) spl_values = [] for met in mets: r = rxns.intersection(spl[met].keys()) spl_values += list(map(spl[met].get, r)) all_distances = (np.array(spl_values) - 1.0) / 2 smrn_dist = smrn[['bigg.metabolite', 'bigg.reaction']].drop_duplicates() smrn_dist['distance'] = pd.np.nan for i, row in smrn_dist.iterrows(): source = row['bigg.metabolite'] # remember we dropped it before target = row['bigg.reaction'] if source.lower() in METS_TO_REMOVE: continue if target in spl[source]: smrn_dist.at[i, 'distance'] = (spl[source][target] - 1.0) / 2.0 # %% Save data smrn_dist = smrn_dist.dropna() return smrn_dist, all_distances
def get_native_EC_numbers(self): model, _ = settings.get_ecoli_json() # make a list of all the EC numbers which are mapped to a BiGG reaction # in the E. coli model, which in turn is mapped to at least one E. coli # gene rids_with_genes = set() for d in self.ecoli_model['reactions']: rid = d['id'] rule = d['gene_reaction_rule'] if re.search('b[0-9]+', rule) is not None: rids_with_genes.add(rid.lower()) # use the self.bigg object to convert these BiGG IDs to EC numbers bigg_reactions = self.reaction_df.groupby('EC_number').first() native_ec = set(bigg_reactions[bigg_reactions['bigg.reaction'].isin( rids_with_genes)].index) # adding Citrate Synthase native_ec.add('2.3.3.16') return native_ec
# Import iJO1366 from .mat and format for mapping of regulatory interactions from BRENDA import os import pandas as pd import pdb import settings # Read BIGG model model, S = settings.get_ecoli_json() # Set to lower case rnames = map(unicode.lower, S.columns) # Read mapping from KEGG IDs model_reactions = pd.read_excel('../data/inline-supplementary-material-2.xls', sheetname=2, header=0) bigg2ec = model_reactions[['Reaction Abbreviation', 'EC Number']] bigg2ec.rename(columns={'Reaction Abbreviation': 'bigg.reaction', 'EC Number': 'EC_number'}, inplace=True) bigg2ec = bigg2ec[~pd.isnull(bigg2ec['EC_Number'])] # Change all reaction IDs to lower-case (apparently the standards have changed # since the model was published, and cases are different now). bigg2ec['bigg.reaction'] = bigg2ec['bigg.reaction'].apply(unicode.lower) # Confirm that the name of the reaction is the same in the .xls and the .mat file error_rnames = [item for item in bigg2ec.index if bigg2ec.at[ item, 'bigg.reaction' ] != rnames[item] ] if len(error_rnames) != 0: print('These reactions may be mapped incorrectly, proceed with care. We will replace the names in bigg2ec for consistency:')
# Import iJO1366 from .mat and format for mapping of regulatory interactions from BRENDA import os import pandas as pd import pdb import settings # Read BIGG model model, S = settings.get_ecoli_json() # Set to lower case rnames = map(unicode.lower, S.columns) # Read mapping from KEGG IDs model_reactions = pd.read_excel('../data/inline-supplementary-material-2.xls', sheetname=2, header=0) bigg2ec = model_reactions[['Reaction Abbreviation', 'EC Number']] bigg2ec.rename(columns={ 'Reaction Abbreviation': 'bigg.reaction', 'EC Number': 'EC_number' }, inplace=True) bigg2ec = bigg2ec[~pd.isnull(bigg2ec['EC_Number'])] # Change all reaction IDs to lower-case (apparently the standards have changed # since the model was published, and cases are different now). bigg2ec['bigg.reaction'] = bigg2ec['bigg.reaction'].apply(unicode.lower) # Confirm that the name of the reaction is the same in the .xls and the .mat file
def __init__(self): self.ecoli_model, self.ecoli_S = settings.get_ecoli_json() self.metabolite_df = BiGG._get_metabolite_df() self.reaction_df = BiGG._get_reaction_df()