def do_many_prunes(network, export, reps, ins, outs):
    '''
    Given a string_chem_net object and whether or not to allow export,
    prune the network many times and record the sizes of the pruned networks
    '''
    # create the COBRApy model
    model = scn.make_cobra_model(
        network.met_list,
        network.rxn_list,
        allow_export = export
    )
    # create a DataFrame to hold the reaction and metabolite counts of the
    # pruned networks
    output = pd.DataFrame(columns = ['rxn_count', 'met_count'])
    # prune this network reps times
    for rep in range(reps):
        if rep % 10 == 0:
            print(f'On prune {rep} of {reps}')
        # work with a copy of the model so it remains untouched for the next
        # iteration of the loop
        full_model = model.copy()
        # randomly choose the appropriate number of input and output mets
        bm_rxn = scn.choose_bm_mets(outs, full_model)
        scn.choose_inputs(ins, full_model, bm_rxn)
        full_model.objective = bm_rxn
        # see if there's a feasible solution on the full model
        solution = full_model.optimize()
        # can't just check solution.status because sometimes it's feasible but the
        # flux through the biomass reaction is vanishingly small
        bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
        while solution.status == 'infeasible' or bm_rxn_flux < 10e-10:
            # if the solution isn't feasible, pick a different environment
            in_rxns = [
                # don't want to remove all boundary reactions because that would
                # also remove all of the export reactions
                rxn for rxn in full_model.boundary if rxn.id.startswith('->')
            ]
            full_model.remove_reactions(in_rxns)
            scn.choose_inputs(outs, full_model, bm_rxn)
            solution = full_model.optimize()
            bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
        # now that we know there's at least one environment that supports growth
        # with this biomass reaction, we can prune the universal network
        pruned_model = scn.min_flux_prune(full_model, bm_rxn)
        # count reactions and metabolites
        rxn_count = len(pruned_model.reactions)
        # metabolites aren't automatically removed when all of their reactions
        # are removed, so find out how many metabolites are left
        met_count = len([
            m for m in pruned_model.metabolites if len(m.reactions) > 0
        ])
        # add to the output dataframe
        some_output = pd.DataFrame(
            [[rxn_count, met_count]], columns = ['rxn_count', 'met_count']
        )
        output = output.append(some_output, ignore_index = True)
    return(output)
import string_chem_net as scn
import itertools as it

try:
    (monos, max_pol, min_ins, max_ins, min_outs, max_outs, reps) = sys.argv[1:]
except ValueError:
    sys.exit(
        'Arguments:\nmonomers\nmax polymer length\nminimum ' +
        'number of food sources\nmaximum number of food sources\nminimum ' +
        'number of biomass precursors\nmaximum number of biomass precursors\n'
        + 'number of times to prune with each setup')

# create the universal network
SCN = scn.CreateNetwork(monos, int(max_pol))
universal_model = scn.make_cobra_model(SCN.met_list,
                                       SCN.rxn_list,
                                       allow_export=True)

# make every pair of number of environments and number of biomass precursors
conditions = list(
    it.product(
        range(int(min_ins),
              int(max_ins) + 1),  # have to add 1 because Python
        range(int(min_outs),
              int(max_outs) + 1)))

# store the reaction-to-metabolite ratio and the % of pruned reactions for each
# pruned network
output_data = list()

# outermost loop over those conditions
    data.columns = ['env', 'rxn_incl', 'growth']
    # add a column with the biomass components
    data['biomass'] = list(
        it.repeat('-'.join([met.id for met in bm_rxn.metabolites]),
                  len(food_mets)))
    # reorder columns
    data = data[['biomass', 'env', 'rxn_incl', 'growth']]
    return (data)


monos = 'ab'  # characters to use as monomers
max_len = 5  # maximum length of each string chemical
ins = 2  # number of food sources / environmental nutrients
envs = 50  # number of different sets of food sources per biomass reaction
outs = 5  # number of biomass precursors
orgs = 10  # number of different biomass reactions
combos = 50  # number of times to perturb coefficients per biomass reaction
threads = 4  # threads to use when pruning in parallel

SCN = scn.CreateNetwork(monos, max_len)
full_model = scn.make_cobra_model(SCN.met_list, SCN.rxn_list)

# prune network using many biomass reactions and environments
pool = mp.Pool(1)
data_bits = pool.map(
    prune_many_times,
    # same arguments every time for orgs times
    [[full_model, ins, outs, envs, combos] for bm in range(orgs)])
data = pd.concat(data_bits)
data.to_csv('data/figure_S6_data.csv', index=False)
# count how many total reps there will be
total_reps = ((max_monos - min_monos) + 1) * ((max_max_len - min_max_len) + 1)
i = 0
# loop over number of unique monomers first
for monos_count in range(min_monos, max_monos + 1):
    # get this number of unique characters
    monos = string.ascii_lowercase[:monos_count]
    # then loop over max string lengths
    for max_len in range(min_max_len, max_max_len + 1):
        i += 1
        print(f'On network size {i} of {total_reps}: {monos}, {max_len}')
        # make a string chemistry network of this size
        SCN = scn.CreateNetwork(monos, max_len)
        cobra_model = scn.make_cobra_model(SCN.met_list,
                                           SCN.rxn_list,
                                           allow_export=False)
        # now do FBA reps times, with a different set of input and output
        # metabolites each time
        rep = 0
        while (rep < reps):
            rep += 1
            if rep % 10 == 0:
                print(f'On rep {rep} of {reps}')
            # make a copy to add the new reactions to
            new_model = cobra_model.copy()
            bm_rxn = scn.choose_bm_mets(outs, new_model)
            scn.choose_inputs(ins, new_model, bm_rxn)
            new_model.objective = bm_rxn
            # optimize and count number of reactions with flux
            solution = new_model.optimize()
        'Arguments:\nmonomers\nmax polymer length\n' +
        'number of food sources\nnumber of times to reselect food sources\n' +
        'number of biomass precursors\nnumber of times to reselect biomass\n' +
        'should there be an export reaction for every metabolite? (yes/no)')

if export == 'yes':
    allow_export = True
elif export == 'no':
    allow_export = False
else:
    sys.exit('The last argument must be either "yes" or "no"')

# create the universal network
SCN = scn.CreateNetwork(monos, int(max_pol))
untouched_model = scn.make_cobra_model(SCN.met_list,
                                       SCN.rxn_list,
                                       allow_export=allow_export)
# make a dataframe to store information about the pruned networks
all_data = pd.DataFrame(columns=['env', 'rxn_incl', 'biomass'])
# loop over the different biomass reactions
for bm in range(int(orgs)):
    print(f'On biomass reaction {bm}')
    # start by making a copy of the original model so we don't have to remove
    # the biomass reaction each time
    model = untouched_model.copy()
    # add a biomass reaction and set it as the objective
    bm_rxn = scn.choose_bm_mets(int(outs), model)
    model.objective = bm_rxn
    # keep lists of the environments used and the reaction-inclusion vectors of
    # the pruned networks
    food_mets = list()