def main(): try: search_dir = argv[1] model_name = argv[2] except: raise Exception('Not enough arguments.') model = load_model(model_name) ids = [id_for_new_id_style(x.id) for x in model.reactions] scores = []; size=len(listdir(search_dir)) for i, path in enumerate(listdir(search_dir)): # progress sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("%d / %d" % (i + 1, size)) sys.stdout.flush() if path.endswith('.gz'): f = gzip.open(join(search_dir, path), 'r') else: f = open(join(search_dir, path), 'r') # (1) Compare the metabolite count m = json.load(f) try: met_count = len(m['MAPNODE']) reaction_count = len(m['MAPREACTION']) # diff = abs(len(m['MAPNODE']) - metabolite_count) except KeyError: continue # (2) Compare the reaction ids to the cobra model # f.seek(0) num_matches = 0 try: reactions = m['MAPREACTION'] except KeyError: continue for reaction in reactions: try: an_id = reaction['REACTIONABBREVATION'] except KeyError: continue if id_for_new_id_style(an_id) in ids: num_matches = num_matches + 1 # quit if not > 90% frac = 0.9 if num_matches < frac*len(reactions): continue scores.append((join(search_dir, path), float(num_matches) / len(reactions), met_count, reaction_count)) f.close() scores = sorted(scores, key=itemgetter(2), reverse=True) scores = sorted(scores, key=itemgetter(1)) outfile = '%s_maps.tsv' % model_name print print 'saving to %s' % outfile (pd .DataFrame(scores, columns=['path', 'score', 'n_metabolites', 'n_reactions']) .to_csv(outfile, sep='\t'))
def main(): """Load an old Escher map, and generate a validated map. """ try: in_file = sys.argv[1] model_path = sys.argv[2] except IndexError: raise Exception("Usage: python convert_map.py old_map.json path/to/model.sbml") # get the cobra model try: model = load_model(model_path) except Exception: try: model = cobra.io.load_json_model(model_path) except (IOError, ValueError): try: model = cobra.io.read_sbml_model(model_path) except IOError: raise Exception('Could not find model in theseus or filesystem: %s' % model_path) # get the current map with open(in_file, 'r') as f: out = json.load(f) the_map = convert(out, model) # don't replace the file out_file = in_file.replace('.json', '_converted.json') print('Saving validated map to %s' % out_file) with open(out_file, 'w') as f: json.dump(the_map, f, allow_nan=False)
def download_or_load_model_me_placeholder(name): if name == 'ME': return 'placeholder' elif name in private_models: return load_model(name) else: return download_model(name, host='http://bigg.ucsd.edu/api/v2/')
def setup_for_series(series, loaded_models, use_greedy_knockouts): """Get a SimulationSetup for the series.""" # copy the model model_id = series['model'] if model_id == 'ME': # this is necessary because I can't copy ME models right now model = load_model(model_id) else: model = loaded_models[model_id].copy() # get the substrates and supplements substrate_exchanges, supplement_exchanges = exchange_for_metabolite_name( series['substrate']) # aerobicity aerobic = series['aerobicity'].strip().lower() == 'aerobic' # heterologous pathway additions = series['additions'] heterologous_pathway = None if additions.strip() == '' else additions # knockouts gene_knockouts = series['deletions_b'] # target_exchange try: target_exchange = exchange_for_metabolite_name(series['target'])[0][0] except NotFoundError: return error_series(series, 'Bad target name: %s' % target) # other bounds other_bounds = {} if target_exchange == 'EX_h2_e': if 'FHL' in model.reactions: other_bounds['FHL'] = (0, 1000) # environment is generic for any model environment = Environment(substrate_exchanges, supplement_exchanges, aerobic, other_bounds) design = Design(heterologous_pathway, gene_knockouts, target_exchange) return SimulationSetup(model, environment, design, use_greedy_knockouts)
def main(): """Load an old Escher map, and generate a validated map. """ try: in_file = sys.argv[1] model_path = sys.argv[2] except IndexError: raise Exception( "Usage: python convert_map.py old_map.json path/to/model.sbml") # get the cobra model try: model = load_model(model_path) except Exception: try: model = cobra.io.load_json_model(model_path) except (IOError, ValueError): try: model = cobra.io.read_sbml_model(model_path) except IOError: raise Exception( 'Could not find model in theseus or filesystem: %s' % model_path) # get the current map with open(in_file, 'r') as f: out = json.load(f) the_map = convert(out, model) # don't replace the file out_file = in_file.replace('.json', '_converted.json') print('Saving validated map to %s' % out_file) with open(out_file, 'w') as f: json.dump(the_map, f, allow_nan=False)
def me_model(): return load_model('ME')
def save_map(filename, out_directory, model_name): if filename.endswith('.json.gz'): with gzip.open(filename, "r") as f: data = json.load(f) out_file = join(out_directory, basename(filename).replace('.json.gz', '_map.json')) elif filename.endswith('.json'): with open(filename, "r") as f: data = json.load(f) out_file = join(out_directory, basename(filename).replace('.json', '_map.json')) else: logging.warn('Not loading file %s' % filename) # get the cobra model model = load_model(model_name) # get the compartment dictionary df = pd.DataFrame.from_csv("compartment_id_key.csv") compartment_id_dictionary = {} for row in df.itertuples(index=True): compartment_id_dictionary[row[0]] = row[1:3] # major categories reactions = [] line_segments = [] text_labels = [] nodes = [] for k, v in data.iteritems(): if k == "MAPREACTION": reactions = v elif k == "MAPLINESEGMENT": segments = v elif k == "MAPTEXT": text_labels = v elif k == "MAPNODE": nodes = v else: raise Exception('Unrecognized category: %s' % k) # do the nodes nodes = parse_nodes(nodes, compartment_id_dictionary) # do the segments parse_segments(segments, reactions, nodes) # do the reactions reactions = parse_reactions(reactions, model, nodes) # do the text labels text_labels = parse_labels(text_labels) # compile the data out = {} out['nodes'] = nodes out['reactions'] = reactions out['text_labels'] = text_labels # translate everything so x > 0 and y > 0 # out = translate_everything(out) # for export, only keep the necessary stuff node_keys_to_keep = [ 'node_type', 'x', 'y', 'name', 'bigg_id', 'label_x', 'label_y', 'node_is_primary', 'connected_segments' ] segment_keys_to_keep = ['from_node_id', 'to_node_id', 'b1', 'b2'] reaction_keys_to_keep = [ 'segments', 'name', 'reversibility', 'bigg_id', 'label_x', 'label_y', 'metabolites', 'gene_reaction_rule' ] text_label_keys_to_keep = ['x', 'y', 'text'] for k, node in out['nodes'].iteritems(): only_keep_keys(node, node_keys_to_keep) for k, reaction in out['reactions'].iteritems(): if 'segments' not in reaction: continue for k, segment in reaction['segments'].iteritems(): only_keep_keys(segment, segment_keys_to_keep) only_keep_keys(reaction, reaction_keys_to_keep) for k, text_label in out['text_labels'].iteritems(): only_keep_keys(text_label, text_label_keys_to_keep) # get max width and height min_max = {'x': [inf, -inf], 'y': [inf, -inf]} for node in nodes.itervalues(): if node['x'] < min_max['x'][0]: min_max['x'][0] = node['x'] if node['x'] > min_max['x'][1]: min_max['x'][1] = node['x'] if node['y'] < min_max['y'][0]: min_max['y'][0] = node['y'] if node['y'] > min_max['y'][1]: min_max['y'][1] = node['y'] width = min_max['x'][1] - min_max['x'][0] height = min_max['y'][1] - min_max['y'][0] out['canvas'] = { 'x': min_max['x'][0] - 0.05 * width, 'y': min_max['y'][0] - 0.05 * height, 'width': width + 0.10 * width, 'height': height + 0.10 * height } header = { "schema": "https://zakandrewking.github.io/escher/escher/jsonschema/1-0-0#", "homepage": "https://zakandrewking.github.io/escher", "map_id": basename(filename).replace('.json', '').replace('.gz', ''), "map_name": "", "map_description": "" } the_map = [header, out] from escher.convert_map import convert the_map = convert(the_map, model) with open(out_file, 'w') as f: json.dump(the_map, f, allow_nan=False)
def secretions_for_knockouts(setup, knockouts=[], max_depth=1000, depth=0, ignore_exchanges=[], return_if_found=None, raise_if_found=None, growth_cutoff=min_biomass, flux_cutoff=1.0): """Accepts a SimulationSetup and a set of knockouts. Returns a tree of secretions using nested dictionaries. Arguments --------- setup: SimulationSetup. knockouts: A list of reaction IDs to knock out. max_depth: The maximum depth to search. depth: The current depth. ignore_exchanges: Exchanges to not knock out. raise_if_found: A reaction ID that, if found, will raise FoundReaction exception. return_if_found: A reaction ID that, if found, will return True as the first return value. growth_cutoff: Below this growth rate, the simulation is considered lethal. flux_cutoff: The minimum flux required to raise for return_if_found. """ # check depth if depth > max_depth: print('Max depth') return False, 'MAX_DEPTH' if depth >= 20 and depth % 10 == 0: print(depth) # always copy the model model = setup.model if model.id == 'ME': model = load_model(model.id) else: model = model.copy() # copy environment for changes, knock out the reactions by adding them to # other_bounds environment = Environment( setup.environment.substrate_exchanges, setup.environment.supplement_exchanges, setup.environment.aerobic, dict({ko: (0, 0) for ko in knockouts}, **setup.environment.other_bounds)) # set up model. Have to do this every time because the ME model cannot be # copied model = apply_design(model, setup.design, setup.use_greedy_knockouts) model = apply_environment(model, environment) # solve the problem sol = me_optimize_growth(model) if model.id == 'ME' else model.optimize() if sol.f is None or sol.f <= growth_cutoff: return False, None else: secretion = dict(get_secretion(model, sol.x_dict, sort=False)) if raise_if_found is not None and raise_if_found in secretion and secretion[ raise_if_found] > flux_cutoff: raise FoundReaction(str(secretion)) elif return_if_found is not None and return_if_found in secretion and secretion[ return_if_found] > flux_cutoff: can_secrete = True children = None else: children_raw = { new_knockout: secretions_for_knockouts(setup, knockouts=knockouts + [new_knockout], max_depth=max_depth, depth=depth + 1, ignore_exchanges=ignore_exchanges, return_if_found=return_if_found, raise_if_found=raise_if_found, growth_cutoff=growth_cutoff, flux_cutoff=flux_cutoff) for new_knockout, flux in secretion.iteritems() if new_knockout not in ignore_exchanges and flux > flux_cutoff } can_secrete = any(v[0] for v in children_raw.itervalues()) children = {k: v[1] for k, v in children_raw.iteritems()} return can_secrete, { 'knockouts': knockouts, 'growth_rate': sol.f, 'secretion': secretion, 'children': children, }
from theseus import load_model import math import cobra.io import random import json import pandas as pd # model ijo = load_model('iJO1366') ijo.genes.get_by_id('b1779').name = 'gapA' cobra.io.save_json_model(ijo, 'iJO1366.json') # gene data gene_data = {gene.id: random.random() * 20 for gene in ijo.genes} with open('gene_data_iJO1366.json', 'w') as f: json.dump(gene_data, f) (pd.DataFrame.from_records(gene_data.items(), columns=['gene', 'value']) .to_csv('gene_data_iJO1366.csv', index=None)) # reaction text data reaction_text_data = {reaction.id: reaction.build_reaction_string() for reaction in ijo.reactions} (pd.DataFrame.from_records(reaction_text_data.items(), columns=['reaction', 'value']) .to_csv('reaction_text_data_iJO1366.csv', index=None)) # convert RNA-seq data to normalize data that looks like array data with open('aerobic_anaerobic_E_coli_RNA-seq.json', 'r') as f: gene_comparison = json.load(f) all_vals = gene_comparison[0].values() + gene_comparison[1].values() # log values
def main(): try: search_dir = argv[1] model_name = argv[2] except: raise Exception('Not enough arguments.') model = load_model(model_name) ids = [id_for_new_id_style(x.id) for x in model.reactions] scores = [] size = len(listdir(search_dir)) for i, path in enumerate(listdir(search_dir)): # progress sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("%d / %d" % (i + 1, size)) sys.stdout.flush() if path.endswith('.gz'): f = gzip.open(join(search_dir, path), 'r') else: f = open(join(search_dir, path), 'r') # (1) Compare the metabolite count m = json.load(f) try: met_count = len(m['MAPNODE']) reaction_count = len(m['MAPREACTION']) # diff = abs(len(m['MAPNODE']) - metabolite_count) except KeyError: continue # (2) Compare the reaction ids to the cobra model # f.seek(0) num_matches = 0 try: reactions = m['MAPREACTION'] except KeyError: continue for reaction in reactions: try: an_id = reaction['REACTIONABBREVATION'] except KeyError: continue if id_for_new_id_style(an_id) in ids: num_matches = num_matches + 1 # quit if not > 90% frac = 0.9 if num_matches < frac * len(reactions): continue scores.append((join(search_dir, path), float(num_matches) / len(reactions), met_count, reaction_count)) f.close() scores = sorted(scores, key=itemgetter(2), reverse=True) scores = sorted(scores, key=itemgetter(1)) outfile = '%s_maps.tsv' % model_name print print 'saving to %s' % outfile (pd.DataFrame(scores, columns=['path', 'score', 'n_metabolites', 'n_reactions']).to_csv(outfile, sep='\t'))
names=['Paper', 'Model'])) # propogate info cols_to_propagate = ['year', 'target_exchange', 'Deletions', 'Aerobicity', 'Additions', 'Target', 'c_byproduct_order', 'Evolved', 'citation_key', 'substrate_exchange', 'PMID', 'authors', 'title', 'strategies', 'In silico prediction', 'Native?', 'Parent strain', 'Substrate'] all_sims3[cols_to_propagate] = all_sims3[cols_to_propagate].groupby(level='Paper').fillna(method='backfill') all_sims3[cols_to_propagate] = all_sims3[cols_to_propagate].groupby(level='Paper').fillna(method='pad') # add year to index all_sims3 = all_sims3.set_index('year', append=True) all_sims3 = all_sims3.sort_index() ### Temporary solution to Single_Exchange_FVA bug hete_model = add_all_heterologous_pathways(load_model('iJO1366')) # year being NaN causes trouble for idx'ing all_sims = all_sims3.reset_index(level='year') all_sims.loc[idx[:, 'ME'], 'min'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_secretion, axis=1) all_sims.loc[idx[:, 'ME'], 'max'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_secretion, axis=1) all_sims.loc[idx[:, 'ME'], 'yield_min'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_yield, axis=1, t='min') all_sims.loc[idx[:, 'ME'], 'yield_max'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_yield, axis=1, t='max') all_sims = all_sims.set_index('year', append=True) all_sims = all_sims.sort_index() ## Check for sims where iJO grows and ME dies (and vice versa) ijo_grows_vs_me = (all_sims .sort_index() .loc[idx[:, ['iJO1366', 'ME'], :], :]
def save_map(filename, out_directory, model_name): if filename.endswith('.json.gz'): with gzip.open(filename, "r") as f: data = json.load(f) out_file = join(out_directory, basename(filename).replace('.json.gz', '_map.json')) elif filename.endswith('.json'): with open(filename, "r") as f: data = json.load(f) out_file = join(out_directory, basename(filename).replace('.json', '_map.json')) else: logging.warn('Not loading file %s' % filename) # get the cobra model model = load_model(model_name) # get the compartment dictionary df = pd.DataFrame.from_csv("compartment_id_key.csv") compartment_id_dictionary = {} for row in df.itertuples(index=True): compartment_id_dictionary[row[0]] = row[1:3] # major categories reactions = []; line_segments = []; text_labels = []; nodes = [] for k, v in data.iteritems(): if k=="MAPREACTION": reactions = v elif k=="MAPLINESEGMENT": segments = v elif k=="MAPTEXT": text_labels = v elif k=="MAPNODE": nodes = v else: raise Exception('Unrecognized category: %s' % k) # do the nodes nodes = parse_nodes(nodes, compartment_id_dictionary) # do the segments parse_segments(segments, reactions, nodes) # do the reactions reactions = parse_reactions(reactions, model, nodes) # do the text labels text_labels = parse_labels(text_labels) # compile the data out = {} out['nodes'] = nodes out['reactions'] = reactions out['text_labels'] = text_labels # translate everything so x > 0 and y > 0 # out = translate_everything(out) # for export, only keep the necessary stuff node_keys_to_keep = ['node_type', 'x', 'y', 'name', 'bigg_id', 'label_x', 'label_y', 'node_is_primary', 'connected_segments'] segment_keys_to_keep = ['from_node_id', 'to_node_id', 'b1', 'b2'] reaction_keys_to_keep = ['segments', 'name', 'reversibility', 'bigg_id', 'label_x', 'label_y', 'metabolites', 'gene_reaction_rule'] text_label_keys_to_keep = ['x', 'y', 'text'] for k, node in out['nodes'].iteritems(): only_keep_keys(node, node_keys_to_keep) for k, reaction in out['reactions'].iteritems(): if 'segments' not in reaction: continue for k, segment in reaction['segments'].iteritems(): only_keep_keys(segment, segment_keys_to_keep) only_keep_keys(reaction, reaction_keys_to_keep) for k, text_label in out['text_labels'].iteritems(): only_keep_keys(text_label, text_label_keys_to_keep) # get max width and height min_max = {'x': [inf, -inf], 'y': [inf, -inf]} for node in nodes.itervalues(): if node['x'] < min_max['x'][0]: min_max['x'][0] = node['x'] if node['x'] > min_max['x'][1]: min_max['x'][1] = node['x'] if node['y'] < min_max['y'][0]: min_max['y'][0] = node['y'] if node['y'] > min_max['y'][1]: min_max['y'][1] = node['y'] width = min_max['x'][1] - min_max['x'][0] height = min_max['y'][1] - min_max['y'][0] out['canvas'] = { 'x': min_max['x'][0] - 0.05 * width, 'y': min_max['y'][0] - 0.05 * height, 'width': width + 0.10 * width, 'height': height + 0.10 * height} header = { "schema": "https://zakandrewking.github.io/escher/escher/jsonschema/1-0-0#", "homepage": "https://zakandrewking.github.io/escher", "map_id": basename(filename).replace('.json', '').replace('.gz', ''), "map_name": "", "map_description": "" } the_map = [header, out] from escher.convert_map import convert the_map = convert(the_map, model) with open(out_file, 'w') as f: json.dump(the_map, f, allow_nan=False)