def do_many_rand_prunes(full_model, bm_rxn, reps): # will hold number of reactions in each network pruned_rxn_counts = list() # will hold reaction inclusion vectors of all unique networks and the # number of times each one came up pruned_counts_dict = dict() # will hold all the unique networks found by random_prune after reps runs pruned_nets = list() for i in range(1, reps + 1): if i % 100 == 0: print(f'On random prune {i}.') pruned_net = scn.random_prune(full_model, bm_rxn) # in order to know whether we've seen this model before, we can't just # compare models, since no two models are ever 'equal', so we'll compare # reaction presence bitstrings. # We also want to keep track of how many times we see each model, so we # will make a dict with the bitstrings as keys # sort is in-place rxn_incl = scn.make_rxn_incl(full_model, pruned_net) if rxn_incl not in pruned_counts_dict.keys(): # make sure all reaction lists are sorted so that all isomorphic # networks have the same reaction list pruned_counts_dict[rxn_incl] = 1 pruned_nets.append(pruned_net) else: # if we already found this network once, then increment the counter # in random_pruned_nets by 1 pruned_counts_dict[rxn_incl] += 1 # so that we can see the distribution of network sizes, record the length # of the reaction list each time, regardless of whether or not we've seen # this network before pruned_rxn_counts.append(len(pruned_net.reactions)) return (pruned_rxn_counts, pruned_counts_dict, pruned_nets)
def prune_many_times(arglist): ''' Given: - COBRApy model representing a full/complete/un-pruned string chemistry - A number of nutrient sources - A number of biomass precursors - A number of different sets of nutrient sources - A number of times to change stoichiometric coefficients in the biomass reaction Do: - Create a biomass reaction with the designated number of reactants - Create the designated number of variants on that reaction with randomized stoichiometric coefficients (each reactant's coefficient is assigned to a random integer between 1 and 10) - Choose the designated number of sets of the designated number of nutrient sources - Prune the network once for each combination of biomass reaction and set of nutrients Return a Dataframe containing: - Binary vector indicating which reactions were kept in each pruned network - List of biomass precursors used - List of nutrient sources used - Maximum achievable flux through biomass reaction ''' # probably a more elegant way to do this but I'm currently new to mp.map() (full_model, ins, outs, envs, combos) = arglist # add a biomass reaction but remove it from the model immediately bm_rxn = scn.choose_bm_mets(outs, full_model) full_model.remove_reactions([bm_rxn]) # loop over vaariants of the biomass reaction with different coefficients # but identical reactants for combo in range(combos): if (combo + 1) % 10 == 0: print(f'On coefficient set {combo+1} of {combos}') # make a new biomass reaction new_bm = cobra.Reaction('varied_bm_rxn') new_bm.add_metabolites( {m: -random.randint(1, 10) for m in bm_rxn.metabolites}) # make a copy of the model before adding the new biomass reaction model = full_model.copy() model.add_reaction(new_bm) model.objective = new_bm # keep lists of the environments used, the reaction-inclusion vectors of # the pruned networks and the growth rates on the pruned networks food_mets = list() rxn_incl_vecs = list() pruned_growths = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < envs: i += 1 # remove existing input reactions in_rxns = [ rxn for rxn in model.boundary if rxn.id.startswith('->') ] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(ins, model, new_bm) in_rxns = [ rxn for rxn in model.boundary if rxn.id.startswith('->') ] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=new_bm.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record the metabolites that worked and prune the network else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # prune the network pruned_net = scn.min_flux_prune(model, new_bm) rxn_incl = scn.make_rxn_incl(model, pruned_net) rxn_incl_vecs.append(rxn_incl) # get the growth rate on the pruned network solution = pruned_net.optimize() pruned_growth = solution.fluxes.get(key=new_bm.id) pruned_growths.append(pruned_growth) # make a dataframe out of the lists and add it to the larger dataframe data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths))) data.columns = ['env', 'rxn_incl', 'growth'] # add a column with the biomass components data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) # reorder columns data = data[['biomass', 'env', 'rxn_incl', 'growth']] return (data)
bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(universal_model, bm_rxn) # find growth in every environment for env in envs: # start by removing existing input reactions in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in pruned_model.boundary if rxn.id.startswith('->') ] pruned_model.remove_reactions(in_rxns) # while we have the pruned network with no input reactions, make the # reaction-inclusion vector bitstring = scn.make_rxn_incl(universal_model, pruned_model) # create new input reactions for met in env: in_rxn = cobra.Reaction( '->' + met.id, upper_bound = 1.0, # only allow importing of this metabolite lower_bound = 0.0 ) in_rxn.add_metabolites({met: 1.0}) pruned_model.add_reaction(in_rxn) # do FBA to find growth in this environment solution = pruned_model.optimize() # prepare output growth = str(solution.fluxes.get(key = bm_rxn.id)) env_string = ','.join([met.id for met in env]) growth_lists.append([bm_rxn.id, env_string, growth, bitstring])
def prune_many_times(arglist): # probably a more elegant way to do this but I'm currently new to mp.map() full_model, ins, outs, envs = arglist # start by making a copy of the original model so we don't have to remove # the biomass reaction each time model = full_model.copy() # add a biomass reaction and set it as the objective bm_rxn = scn.choose_bm_mets(outs, model) model.objective = bm_rxn # keep lists of the environments used, the reaction-inclusion vectors of # the pruned networks and the growth rates on the pruned networks food_mets = list() rxn_incl_vecs = list() pruned_growths = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < envs: i += 1 # remove existing input reactions in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(ins, model, bm_rxn) in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record the metabolites that worked and prune the network else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # prune the network pruned_net = scn.bm_impact_prune(model, bm_rxn) rxn_incl = scn.make_rxn_incl(model, pruned_net) rxn_incl_vecs.append(rxn_incl) # get the growth rate on the pruned network solution = pruned_net.optimize() pruned_growth = solution.fluxes.get(key=bm_rxn.id) pruned_growths.append(pruned_growth) # make a dataframe out of the lists and add it to the larger dataframe data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths))) data.columns = ['env', 'rxn_incl', 'growth'] # add a column with the biomass components data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) # reorder columns data = data[['biomass', 'env', 'rxn_incl', 'growth']] return (data)
j += 1 continue # record these metabolites and the reactions that had flux else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # remove all reactions without flux no_flux_rxns = solution.fluxes[solution.fluxes == 0].index flux_only = model.copy() flux_only.remove_reactions(no_flux_rxns) rxn_incl = scn.make_rxn_incl(model, flux_only) rxn_incl_vecs.append(rxn_incl) # make a dataframe out of the two lists and add it to the larger dataframe more_data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs))) more_data.columns = ['env', 'rxn_incl'] # add a column with the biomass components more_data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) all_data = all_data.append(more_data) all_data.to_csv( f'data/multiple_env_fba_{monos}_{max_pol}_{ins}ins_{envs}envs_' + f'{outs}outs_{orgs}orgs_{export}exp.csv')
# see if these biomass precursors can be produced on this environment # before we bother pruning solution = cobra_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status != 'infeasible' and bm_rxn_flux > 10e-10: # only increment the counter if we've chosen a usable biomass reaction i += 1 print(f'On biomass reaction {i}') # run the minimum flux pruner pruned_net = scn.min_flux_prune(cobra_model, bm_rxn) # remove the biomass reaction before making the reaction bitstring # can't just remove the reaction because somehow in the pruning process # the biomass reaction became different in some subtle way pruned_bm_rxn = pruned_net.reactions.get_by_id(bm_rxn.id) pruned_net.remove_reactions([pruned_bm_rxn]) bitstring = scn.make_rxn_incl(cobra_model, pruned_net) pruned_nets[bitstring] = [met.id for met in bm_rxn.metabolites] # remove this biomass reaction from the full network regardless of whether # it worked or not cobra_model.remove_reactions([bm_rxn]) # print a bunch of info but also write it out to a tsv with open( f'data/multiple_bm_min_prune_{monos}_{max_pol}_{ins}ins_' + f'{outs}outs_{bms}_bms.tsv', 'w') as out: out.write('bitstring\trxn_count\tbiomass\n') for network in pruned_nets.keys(): rxn_count = count_bitstring(network) out_row = '\t'.join( [network, str(rxn_count), ','.join(pruned_nets[network])]) out.write(out_row + '\n')
export_model.objective = exp_bm_rxn no_export_model.objective = no_exp_bm_rxn scn.choose_inputs(int(ins), no_export_model, no_exp_bm_rxn) export_model.add_reactions( [rxn.copy() for rxn in no_export_model.boundary]) # see if this combination works for both networks export_solution = export_model.optimize() no_export_solution = no_export_model.optimize() # do min-flux pruning and get reaction-inclusion vectors for both networks print('Using minimum-flux pruner.') min_pruned_export = scn.min_flux_prune(export_model, exp_bm_rxn) min_pruned_no_export = scn.min_flux_prune(no_export_model, no_exp_bm_rxn) min_export_count = len(min_pruned_export.reactions) min_no_export_count = len(min_pruned_no_export.reactions) min_export_rxn_incl = scn.make_rxn_incl(export_model, min_pruned_export) min_no_export_rxn_incl = scn.make_rxn_incl(no_export_model, min_pruned_no_export) # randomly prune each network as many times as specified print('Randomly pruning with export reactions.') (rand_export_pruned_rxn_counts, rand_export_prune_counts, rand_export_pruned_nets) = do_many_rand_prunes(export_model, exp_bm_rxn, int(reps)) print('Randomly pruning without export reactions.') (rand_no_export_pruned_rxn_counts, rand_no_export_prune_counts, rand_no_export_pruned_nets) = do_many_rand_prunes(no_export_model, no_exp_bm_rxn, int(reps)) print('Preparing text output.')