Ejemplo n.º 1
0
def do_many_prunes(network, export, reps, ins, outs):
    '''
    Given a string_chem_net object and whether or not to allow export,
    prune the network many times and record the sizes of the pruned networks
    '''
    # create the COBRApy model
    model = scn.make_cobra_model(
        network.met_list,
        network.rxn_list,
        allow_export = export
    )
    # create a DataFrame to hold the reaction and metabolite counts of the
    # pruned networks
    output = pd.DataFrame(columns = ['rxn_count', 'met_count'])
    # prune this network reps times
    for rep in range(reps):
        if rep % 10 == 0:
            print(f'On prune {rep} of {reps}')
        # work with a copy of the model so it remains untouched for the next
        # iteration of the loop
        full_model = model.copy()
        # randomly choose the appropriate number of input and output mets
        bm_rxn = scn.choose_bm_mets(outs, full_model)
        scn.choose_inputs(ins, full_model, bm_rxn)
        full_model.objective = bm_rxn
        # see if there's a feasible solution on the full model
        solution = full_model.optimize()
        # can't just check solution.status because sometimes it's feasible but the
        # flux through the biomass reaction is vanishingly small
        bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
        while solution.status == 'infeasible' or bm_rxn_flux < 10e-10:
            # if the solution isn't feasible, pick a different environment
            in_rxns = [
                # don't want to remove all boundary reactions because that would
                # also remove all of the export reactions
                rxn for rxn in full_model.boundary if rxn.id.startswith('->')
            ]
            full_model.remove_reactions(in_rxns)
            scn.choose_inputs(outs, full_model, bm_rxn)
            solution = full_model.optimize()
            bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
        # now that we know there's at least one environment that supports growth
        # with this biomass reaction, we can prune the universal network
        pruned_model = scn.min_flux_prune(full_model, bm_rxn)
        # count reactions and metabolites
        rxn_count = len(pruned_model.reactions)
        # metabolites aren't automatically removed when all of their reactions
        # are removed, so find out how many metabolites are left
        met_count = len([
            m for m in pruned_model.metabolites if len(m.reactions) > 0
        ])
        # add to the output dataframe
        some_output = pd.DataFrame(
            [[rxn_count, met_count]], columns = ['rxn_count', 'met_count']
        )
        output = output.append(some_output, ignore_index = True)
    return(output)
Ejemplo n.º 2
0
def prune_once(universal_model, ins, outs, flux_bins, rep):
    '''
    Given a universal string chemistry network, add a random biomass reaction
    and random input reactions, make sure that combination can produce biomass,
    prune the network, and return the degree and flux distributions of the 
    pruned network
    '''
    # work with a copy of the model so it remains untouched for the next
    # iteration of the loop
    full_model = universal_model.copy()
    # randomly choose the appropriate number of input and output mets
    bm_rxn = scn.choose_bm_mets(outs, full_model)
    scn.choose_inputs(ins, full_model, bm_rxn)
    full_model.objective = bm_rxn
    # see if there's a feasible solution on the full model
    solution = full_model.optimize()
    # can't just check solution.status because sometimes it's feasible but the
    # flux through the biomass reaction is vanishingly small
    bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
    while solution.status == 'infeasible' or bm_rxn_flux < 10e-10:
        # if the solution isn't feasible, pick a different environment
        in_rxns = [
            # don't want to remove all boundary reactions because that would
            # also remove all of the export reactions
            rxn for rxn in full_model.boundary if rxn.id.startswith('->')
        ]
        full_model.remove_reactions(in_rxns)
        scn.choose_inputs(ins, full_model, bm_rxn)
        solution = full_model.optimize()
        bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
    # now that we know there's at least one environment that supports growth
    # with this biomass reaction, we can prune the universal network
    pruned_model = scn.min_flux_prune(full_model, bm_rxn)
    # get the degree and flux distributions from the pruned network
    deg_dist = make_deg_dist(pruned_model)
    fluxes = abs(pruned_model.optimize().fluxes)
    # exclude fluxes that are approximately zero
    flux_dist = pd.DataFrame(fluxes[fluxes > 10e-10])
    flux_dist.columns = ['flux']
    # add a column to the degree and flux distribution dataframes to indicate
    # which round of pruning this data came from
    deg_dist['trial'] = rep
    flux_dist['trial'] = rep
    return ((deg_dist, flux_dist))
Ejemplo n.º 3
0
def prune_model(universal_model, ins, outs):
    '''
    Prune the given universal model with the specified number of input and
    output metabolites
    '''
    # work with a copy of the model so it remains untouched for the next
    # iteration of the loop
    full_model = universal_model.copy()
    # randomly choose the appropriate number of input and output mets
    bm_rxn = scn.choose_bm_mets(outs, full_model)
    scn.choose_inputs(ins, full_model, bm_rxn)
    full_model.objective = bm_rxn
    # see if there's a feasible solution on the full model
    solution = full_model.optimize()
    # can't just check solution.status because sometimes it's feasible but the
    # flux through the biomass reaction is vanishingly small
    bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
    while solution.status == 'infeasible' or bm_rxn_flux < 10e-10:
        # if the solution isn't feasible, pick a different environment
        in_rxns = [
            # don't want to remove all boundary reactions because that would
            # also remove all of the export reactions
            rxn for rxn in full_model.boundary if rxn.id.startswith('->')
        ]
        full_model.remove_reactions(in_rxns)
        scn.choose_inputs(ins, full_model, bm_rxn)
        solution = full_model.optimize()
        bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
    # now that we know there's at least one environment that supports growth
    # with this biomass reaction, we can prune the universal network
    pruned_model = scn.min_flux_prune(full_model, bm_rxn)
    # metabolites aren't automatically removed when all of their reactions
    # are removed, so find out how many metabolites are left
    met_count = len(
        [m for m in pruned_model.metabolites if len(m.reactions) > 0])
    # compute the reaction-to-metabolite ratio and % of pruned reactions
    ratio = len(pruned_model.reactions) / met_count
    pruned_count = len(universal_model.reactions) - len(pruned_model.reactions)
    pruned_pct = pruned_count / len(universal_model.reactions)
    output = [ins, outs, ratio, pruned_pct]
    # make everything a string so we can join it later
    return ([str(x) for x in output])
        # flux through the biomass reaction is vanishingly small
        bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
        while solution.status == 'infeasible' or bm_rxn_flux < 10e-10:
            # if the solution isn't feasible, pick a different environment
            in_rxns = [
                # don't want to remove all boundary reactions because that would
                # also remove all of the export reactions
                rxn for rxn in full_model.boundary if rxn.id.startswith('->')
            ]
            full_model.remove_reactions(in_rxns)
            scn.choose_inputs(condition[0], full_model, bm_rxn)
            solution = full_model.optimize()
            bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
        # now that we know there's at least one environment that supports growth
        # with this biomass reaction, we can prune the universal network
        pruned_model = scn.min_flux_prune(full_model, bm_rxn)
        # metabolites aren't automatically removed when all of their reactions
        # are removed, so find out how many metabolites are left
        met_count = len(
            [m for m in pruned_model.metabolites if len(m.reactions) > 0])
        # compute the reaction-to-metabolite ratio and % of pruned reactions
        ratio = len(pruned_model.reactions) / met_count
        pruned_count = len(universal_model.reactions) - len(
            pruned_model.reactions)
        pruned_pct = pruned_count / len(universal_model.reactions)
        output = [condition[0], condition[1], ratio, pruned_pct]
        # make everything a string so we can join it later
        output_data.append([str(x) for x in output])

with open(
        f'data/varied_{monos}_{max_pol}_{min_ins}to{max_ins}ins_' +
Ejemplo n.º 5
0
def prune_many_times(arglist):
    '''
    Given:
    - COBRApy model representing a full/complete/un-pruned string chemistry
    - A number of nutrient sources
    - A number of biomass precursors
    - A number of different sets of nutrient sources
    - A number of times to change stoichiometric coefficients in the biomass
      reaction
    Do:
    - Create a biomass reaction with the designated number of reactants
    - Create the designated number of variants on that reaction with randomized 
      stoichiometric coefficients (each reactant's coefficient is assigned to
      a random integer between 1 and 10)
    - Choose the designated number of sets of the designated number of nutrient
      sources
    - Prune the network once for each combination of biomass reaction and
      set of nutrients
    Return a Dataframe containing:
    - Binary vector indicating which reactions were kept in each pruned network
    - List of biomass precursors used
    - List of nutrient sources used
    - Maximum achievable flux through biomass reaction
    '''
    # probably a more elegant way to do this but I'm currently new to mp.map()
    (full_model, ins, outs, envs, combos) = arglist
    # add a biomass reaction but remove it from the model immediately
    bm_rxn = scn.choose_bm_mets(outs, full_model)
    full_model.remove_reactions([bm_rxn])
    # loop over vaariants of the biomass reaction with different coefficients
    # but identical reactants
    for combo in range(combos):
        if (combo + 1) % 10 == 0:
            print(f'On coefficient set {combo+1} of {combos}')
        # make a new biomass reaction
        new_bm = cobra.Reaction('varied_bm_rxn')
        new_bm.add_metabolites(
            {m: -random.randint(1, 10)
             for m in bm_rxn.metabolites})
        # make a copy of the model before adding the new biomass reaction
        model = full_model.copy()
        model.add_reaction(new_bm)
        model.objective = new_bm
        # keep lists of the environments used, the reaction-inclusion vectors of
        # the pruned networks and the growth rates on the pruned networks
        food_mets = list()
        rxn_incl_vecs = list()
        pruned_growths = list()
        # use a while loop and not a for loop so we can go back on occasion
        i = 0
        # counter for how many times it had to reselct the environment to get a
        # feasible solution with the full network
        j = 0
        while i < envs:
            i += 1
            # remove existing input reactions
            in_rxns = [
                rxn for rxn in model.boundary if rxn.id.startswith('->')
            ]
            model.remove_reactions(in_rxns)
            # choose new input reactions
            scn.choose_inputs(ins, model, new_bm)
            in_rxns = [
                rxn for rxn in model.boundary if rxn.id.startswith('->')
            ]
            foods_string = ' '.join([
                # getting the metabolite IDs out of a reaction is annoying
                list(rxn.metabolites.keys())[0].id for rxn in in_rxns
            ])
            # see if this choice of metabolites can produce the biomass on this network
            solution = model.optimize()
            bm_rxn_flux = solution.fluxes.get(key=new_bm.id)
            if solution.status == 'infeasible' or bm_rxn_flux < 1e-10:
                # redo this iteration of the loop
                i -= 1
                # increment the counter of redos
                j += 1
                continue
            # record the metabolites that worked and prune the network
            else:
                if i % 100 == 0:
                    print(f'On environment {i}')
                # reset the reselection counter
                j = 0
                # get the list of food source metabolites
                food_mets.append('-'.join(
                    [met.id for rxn in in_rxns for met in rxn.metabolites]))
                # prune the network
                pruned_net = scn.min_flux_prune(model, new_bm)
                rxn_incl = scn.make_rxn_incl(model, pruned_net)
                rxn_incl_vecs.append(rxn_incl)
                # get the growth rate on the pruned network
                solution = pruned_net.optimize()
                pruned_growth = solution.fluxes.get(key=new_bm.id)
                pruned_growths.append(pruned_growth)

    # make a dataframe out of the lists and add it to the larger dataframe
    data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths)))
    data.columns = ['env', 'rxn_incl', 'growth']
    # add a column with the biomass components
    data['biomass'] = list(
        it.repeat('-'.join([met.id for met in bm_rxn.metabolites]),
                  len(food_mets)))
    # reorder columns
    data = data[['biomass', 'env', 'rxn_incl', 'growth']]
    return (data)
Ejemplo n.º 6
0
 # flux through the biomass reaction is vanishingly small
 bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
 while solution.status == 'infeasible' or bm_rxn_flux < 10e-10:
     # if the solution isn't feasible, pick a different environment
     in_rxns = [
         # don't want to remove all boundary reactions because that would
         # also remove all of the export reactions
         rxn for rxn in universal_model.boundary if rxn.id.startswith('->')
     ]
     universal_model.remove_reactions(in_rxns)
     scn.choose_inputs(int(ins), universal_model, bm_rxn)
     solution = universal_model.optimize()
     bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
 # now that we know there's at least one environment that supports growth
 # with this biomass reaction, we can prune the universal network
 pruned_model = scn.min_flux_prune(universal_model, bm_rxn)
 # find growth in every environment
 for env in envs:
     # start by removing existing input reactions
     in_rxns = [
         # don't want to remove all boundary reactions because that would
         # also remove all of the export reactions
         rxn for rxn in pruned_model.boundary if rxn.id.startswith('->')
     ]
     pruned_model.remove_reactions(in_rxns)
     # while we have the pruned network with no input reactions, make the
     # reaction-inclusion vector
     bitstring = scn.make_rxn_incl(universal_model, pruned_model)
     # create new input reactions
     for met in env:
         in_rxn = cobra.Reaction(
pruned_nets = dict()
i = 0
while i < int(bms):
    # pick a new biomass reaction and set it as the objective
    bm_rxn = scn.choose_bm_mets(int(outs), cobra_model)
    cobra_model.objective = bm_rxn
    # see if these biomass precursors can be produced on this environment
    # before we bother pruning
    solution = cobra_model.optimize()
    bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
    if solution.status != 'infeasible' and bm_rxn_flux > 10e-10:
        # only increment the counter if we've chosen a usable biomass reaction
        i += 1
        print(f'On biomass reaction {i}')
        # run the minimum flux pruner
        pruned_net = scn.min_flux_prune(cobra_model, bm_rxn)
        # remove the biomass reaction before making the reaction bitstring
        # can't just remove the reaction because somehow in the pruning process
        # the biomass reaction became different in some subtle way
        pruned_bm_rxn = pruned_net.reactions.get_by_id(bm_rxn.id)
        pruned_net.remove_reactions([pruned_bm_rxn])
        bitstring = scn.make_rxn_incl(cobra_model, pruned_net)
        pruned_nets[bitstring] = [met.id for met in bm_rxn.metabolites]
    # remove this biomass reaction from the full network regardless of whether
    # it worked or not
    cobra_model.remove_reactions([bm_rxn])

# print a bunch of info but also write it out to a tsv
with open(
        f'data/multiple_bm_min_prune_{monos}_{max_pol}_{ins}ins_' +
        f'{outs}outs_{bms}_bms.tsv', 'w') as out:
Ejemplo n.º 8
0
    # choose new input and biomass reactions
    exp_bm_rxn = scn.choose_bm_mets(int(outs), export_model)
    no_exp_bm_rxn = exp_bm_rxn.copy()
    no_export_model.add_reaction(no_exp_bm_rxn)
    export_model.objective = exp_bm_rxn
    no_export_model.objective = no_exp_bm_rxn
    scn.choose_inputs(int(ins), no_export_model, no_exp_bm_rxn)
    export_model.add_reactions(
        [rxn.copy() for rxn in no_export_model.boundary])
    # see if this combination works for both networks
    export_solution = export_model.optimize()
    no_export_solution = no_export_model.optimize()

# do min-flux pruning and get reaction-inclusion vectors for both networks
print('Using minimum-flux pruner.')
min_pruned_export = scn.min_flux_prune(export_model, exp_bm_rxn)
min_pruned_no_export = scn.min_flux_prune(no_export_model, no_exp_bm_rxn)
min_export_count = len(min_pruned_export.reactions)
min_no_export_count = len(min_pruned_no_export.reactions)
min_export_rxn_incl = scn.make_rxn_incl(export_model, min_pruned_export)
min_no_export_rxn_incl = scn.make_rxn_incl(no_export_model,
                                           min_pruned_no_export)

# randomly prune each network as many times as specified
print('Randomly pruning with export reactions.')
(rand_export_pruned_rxn_counts, rand_export_prune_counts,
 rand_export_pruned_nets) = do_many_rand_prunes(export_model, exp_bm_rxn,
                                                int(reps))

print('Randomly pruning without export reactions.')
(rand_no_export_pruned_rxn_counts, rand_no_export_prune_counts,
Ejemplo n.º 9
0
def prune_many_times(arglist):
    # probably a more elegant way to do this but I'm currently new to mp.map()
    full_model, ins, outs, envs = arglist
    # start by making a copy of the original model so we don't have to remove
    # the biomass reaction each time
    model = full_model.copy()
    # add a biomass reaction and set it as the objective
    bm_rxn = scn.choose_bm_mets(outs, model)
    model.objective = bm_rxn
    # keep lists of the environments used, the reaction-inclusion vectors of
    # the pruned networks and the growth rates on the pruned networks
    food_mets = list()
    rxn_incl_vecs = list()
    pruned_growths = list()
    # use a while loop and not a for loop so we can go back on occasion
    i = 0
    # counter for how many times it had to reselct the environment to get a
    # feasible solution with the full network
    j = 0
    while i < envs:
        i += 1
        # remove existing input reactions
        in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')]
        model.remove_reactions(in_rxns)
        # choose new input reactions
        scn.choose_inputs(ins, model, bm_rxn)
        in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')]
        foods_string = ' '.join([
            # getting the metabolite IDs out of a reaction is annoying
            list(rxn.metabolites.keys())[0].id for rxn in in_rxns
        ])
        # see if this choice of metabolites can produce the biomass on this network
        solution = model.optimize()
        bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
        if solution.status == 'infeasible' or bm_rxn_flux < 1e-10:
            # redo this iteration of the loop
            i -= 1
            # increment the counter of redos
            j += 1
            continue
        # record the metabolites that worked and prune the network
        else:
            if i % 100 == 0:
                print(f'On environment {i}')
            # reset the reselection counter
            j = 0
            # get the list of food source metabolites
            food_mets.append('-'.join(
                [met.id for rxn in in_rxns for met in rxn.metabolites]))
            # prune the network
            pruned_net = scn.min_flux_prune(model, bm_rxn)
            rxn_incl = scn.make_rxn_incl(model, pruned_net)
            rxn_incl_vecs.append(rxn_incl)
            # get the growth rate on the pruned network
            solution = pruned_net.optimize()
            pruned_growth = solution.fluxes.get(key=bm_rxn.id)
            pruned_growths.append(pruned_growth)

    # make a dataframe out of the lists and add it to the larger dataframe
    data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths)))
    data.columns = ['env', 'rxn_incl', 'growth']
    # add a column with the biomass components
    data['biomass'] = list(
        it.repeat('-'.join([met.id for met in bm_rxn.metabolites]),
                  len(food_mets)))
    # reorder columns
    data = data[['biomass', 'env', 'rxn_incl', 'growth']]
    return (data)