def sarsa_n(nn_model, loss_fn, optimizer, scheduler, state_sample, n_back_step,
            epsilon_greedy):
    total_time_cpu = 0
    total_time_nn = 0
    #reset for each episode. policy will add
    random_steps_taken = 0
    nn_steps_taken = 0

    final_state = []
    final_KQ_f = []
    final_KQ_r = []
    reached_terminal_state = False
    average_loss = []

    final_reward = 0
    sum_reward_episode = 0
    end_of_path = 5000  #this is the maximum length a path can take
    KQ_f_matrix = np.zeros(shape=(num_rxns, end_of_path + 1))
    KQ_r_matrix = np.zeros(shape=(num_rxns, end_of_path + 1))
    states_matrix = np.zeros(shape=(num_rxns, end_of_path + 1))
    delta_S_metab_matrix = np.zeros(shape=(nvar, end_of_path + 1))
    v_log_counts_matrix = np.zeros(shape=(nvar, end_of_path + 1))

    states_matrix[:, 0] = state_sample

    res_lsq = least_squares(max_entropy_functions.derivatives,
                            v_log_counts_static,
                            method='lm',
                            xtol=1e-15,
                            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                                  delta_increment_for_small_concs,
                                  Keq_constant, states_matrix[:, 0]))

    v_log_counts_matrix[:, 0] = res_lsq.x.copy()
    log_metabolites = np.append(v_log_counts_matrix[:, 0], f_log_counts)

    rxn_flux_init = max_entropy_functions.oddsDiff(
        v_log_counts_matrix[:, 0], f_log_counts, mu0, S_mat, R_back_mat, P_mat,
        delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0])
    KQ_f_matrix[:, 0] = max_entropy_functions.odds(
        log_metabolites, mu0, S_mat, R_back_mat, P_mat,
        delta_increment_for_small_concs, Keq_constant)

    Keq_inverse = np.power(Keq_constant, -1)
    KQ_r_matrix[:, 0] = max_entropy_functions.odds(
        log_metabolites, mu0, -S_mat, P_mat, R_back_mat,
        delta_increment_for_small_concs, Keq_inverse, -1)

    delta_S_metab_matrix[:, 0] = max_entropy_functions.calc_deltaS_metab(
        v_log_counts_matrix[:, 0], target_v_log_counts)

    reward_vec = np.zeros(end_of_path + 1)

    reward_vec[0] = 0.0
    rxn_flux_path = rxn_flux_init.copy()

    for t in range(0, end_of_path):
        if (t < end_of_path):
            #This represents the choice from the current policy.
            [React_Choice,reward_vec[t+1],\
            KQ_f_matrix[:,t+1], KQ_r_matrix[:,t+1],\
            v_log_counts_matrix[:,t+1],\
            states_matrix[:,t+1],\
            delta_S_metab_matrix[:,t+1],\
            used_random_step,time_cpu,time_nn] = policy_function(nn_model, states_matrix[:,t], v_log_counts_matrix[:,t], epsilon_greedy)#regulate each reaction.

            total_time_cpu += time_cpu
            total_time_nn += time_nn

            if (used_random_step):
                random_steps_taken += 1
            else:
                nn_steps_taken += 1

            if (React_Choice == -1):
                print("bad reaction choice, using action = -1")
                break

            rxn_flux_path = max_entropy_functions.oddsDiff(
                v_log_counts_matrix[:, t + 1], f_log_counts, mu0, S_mat,
                R_back_mat, P_mat, delta_increment_for_small_concs,
                Keq_constant, states_matrix[:, t + 1])
            if (np.max(rxn_flux_path) < 1.0):
                print("draining flux")
                break
            epr_path = max_entropy_functions.entropy_production_rate(
                KQ_f_matrix[:, t + 1], KQ_r_matrix[:, t + 1],
                states_matrix[:, t + 1])
            sum_reward_episode += reward_vec[t + 1]

            current_state = states_matrix[:, t + 1].copy()

            #We stop the path if we have no more positive loss function values, or if we revisit a state.
            if ((delta_S_metab_matrix[:, t + 1] <= 0.0).all()):
                end_of_path = t + 1  #stops simulation at step t+1

                reached_terminal_state = True
                final_state = states_matrix[:, t + 1].copy()
                final_KQ_f = KQ_f_matrix[:, t + 1].copy()
                final_KQ_r = KQ_r_matrix[:, t + 1].copy()
                final_reward = epr_path
                print(
                    "**************************************Path Length ds<0******************************************"
                )
                print(end_of_path)
                print("Final STATE")
                print(states_matrix[:, t + 1])
                print(rxn_flux_path)
                print("original epr")
                print(epr_path)
                print("all rewards")
                print(reward_vec[0:t + 1])

        ##BEGIN LEARNING
        tau = t - n_back_step + 1

        if (tau >= 0):
            #breakpoint()
            estimate_value = torch.zeros(1, device=device)

            for i in range(tau + 1, min(tau + n_back_step, end_of_path) + 1):
                estimate_value += (gamma**(i - tau - 1)) * reward_vec[i]

            if ((tau + n_back_step) < end_of_path):
                begin_nn = time.time()
                value_tau_n = state_value(
                    nn_model,
                    torch.from_numpy(
                        states_matrix[:,
                                      tau + n_back_step]).float().to(device))
                end_nn = time.time()
                total_time_nn += end_nn - begin_nn
                estimate_value += (gamma**(n_back_step)) * value_tau_n

            begin_nn = time.time()
            value_tau = state_value(
                nn_model,
                torch.from_numpy(states_matrix[:, tau]).float().to(device))
            end_nn = time.time()
            total_time_nn += end_nn - begin_nn

            if (value_tau.requires_grad == False):
                breakpoint()
            if (estimate_value.requires_grad == True):
                estimate_value.detach_()

            #WARNING
            #loss ordering should be input with requires_grad == True,
            #followed by target with requires_grad == False
            #breakpoint()
            begin_nn = time.time()
            loss = loss_fn(value_tau, estimate_value)  #MSE

            optimizer.zero_grad()
            loss.backward()
            clipping_value = 1.0
            torch.nn.utils.clip_grad_norm_(nn_model.parameters(),
                                           clipping_value)

            optimizer.step()
            end_nn = time.time()
            total_time_nn += end_nn - begin_nn
            average_loss.append(loss.item())

        if (tau >= (end_of_path - 1)):
            break

    #after episode is finished, take average loss
    average_loss_episode = np.mean(average_loss)
    print("index of max error on path")
    print(average_loss.index(max(average_loss)))
    return [sum_reward_episode, average_loss_episode,max(average_loss),final_reward, final_state, final_KQ_f,final_KQ_r,\
            reached_terminal_state, random_steps_taken,nn_steps_taken]
def policy_function(nn_model, state, v_log_counts_path, *args):
    #last input argument should be epsilon for use when using greedy-epsilon algorithm.
    varargin = args
    nargin = len(varargin)
    epsilon_greedy = 0.0
    if (nargin == 1):
        epsilon_greedy = varargin[0]

    used_random_step = False

    rxn_choices = [i for i in range(num_rxns)]

    unif_rand = np.random.uniform(0, 1)
    if ((unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)):
        used_random_step = True
        random_choice = random.choice(rxn_choices)
        final_action = random_choice
        used_random_step = 1

        res_lsq = least_squares(max_entropy_functions.derivatives,
                                v_log_counts_path,
                                method='lm',
                                xtol=1e-15,
                                args=(f_log_counts, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant, state))

        final_v_log_counts = res_lsq.x

        new_log_metabolites = np.append(final_v_log_counts, f_log_counts)
        final_state = state.copy()
        newE = max_entropy_functions.calc_new_enzyme_simple(
            state, final_action)
        final_state = state.copy()  #DO NOT MODIFY ORIGINAL STATE
        final_state[final_action] = newE

        final_delta_s_metab = max_entropy_functions.calc_deltaS_metab(
            final_v_log_counts, target_v_log_counts)
        final_KQ_f = max_entropy_functions.odds(
            new_log_metabolites, mu0, S_mat, R_back_mat, P_mat,
            delta_increment_for_small_concs, Keq_constant)
        Keq_inverse = np.power(Keq_constant, -1)
        final_KQ_r = max_entropy_functions.odds(
            new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat,
            delta_increment_for_small_concs, Keq_inverse, -1)

        value_current_state = state_value(
            nn_model,
            torch.from_numpy(final_state).float().to(device))
        value_current_state = value_current_state.item()
        final_reward = reward_value(final_v_log_counts, v_log_counts_path, \
                                    final_KQ_f, final_KQ_r,\
                                    final_state, state)

    else:
        #In this, we must choose base on the best prediction base on environmental feedback

        v_log_counts = v_log_counts_path

        log_metabolites = np.append(v_log_counts, f_log_counts)

        rxn_flux = max_entropy_functions.oddsDiff(
            v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat,
            delta_increment_for_small_concs, Keq_constant, state)
        KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat,
                                          R_back_mat, P_mat,
                                          delta_increment_for_small_concs,
                                          Keq_constant)
        Keq_inverse = np.power(Keq_constant, -1)
        KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat,
                                          R_back_mat,
                                          delta_increment_for_small_concs,
                                          Keq_inverse, -1)

        [RR, Jac
         ] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat,
                                             delta_increment_for_small_concs,
                                             KQ_f, KQ_r, state)
        A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat,
                                         Jac, state)

        delta_S_metab = max_entropy_functions.calc_deltaS_metab(
            v_log_counts, target_v_log_counts)

        [ccc, fcc] = max_entropy_functions.conc_flux_control_coeff(
            nvar, A, S_mat, rxn_flux, RR)

        indices = [i for i in range(0, len(Keq_constant))]

        #minimal varialbes to run optimization
        #variables=[state, v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant]

        #with Pool() as pool:
        #    async_result = pool.starmap(potential_step, zip(indices, repeat(variables)))
        #    pool.close()
        #    pool.join()
        #end = time.time()
        #print(v_log_counts_path)
        async_result = pstep.dispatch(rxn_choices, S_mat, R_back_mat, P_mat,
                                      Keq_constant, state, f_log_counts,
                                      v_log_counts_path)
        #print(async_result[0])
        temp_action_value = -np.inf
        for act in range(0, len(async_result)):

            #new_v_log_counts = async_result[act][0] #output from poo
            new_v_log_counts = async_result[act]
            new_log_metabolites = np.append(new_v_log_counts, f_log_counts)

            trial_state_sample = state.copy()

            newE = max_entropy_functions.calc_new_enzyme_simple(state, act)
            trial_state_sample = state.copy()  #DO NOT MODIFY ORIGINAL STATE
            trial_state_sample[act] = newE
            new_delta_S_metab = max_entropy_functions.calc_deltaS_metab(
                new_v_log_counts, target_v_log_counts)

            KQ_f_new = max_entropy_functions.odds(
                new_log_metabolites, mu0, S_mat, R_back_mat, P_mat,
                delta_increment_for_small_concs, Keq_constant)
            KQ_r_new = max_entropy_functions.odds(
                new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat,
                delta_increment_for_small_concs, Keq_inverse, -1)

            value_current_state = state_value(
                nn_model,
                torch.from_numpy(trial_state_sample).float().to(device))
            value_current_state = value_current_state.item()

            current_reward = reward_value(new_v_log_counts, v_log_counts, \
                                        KQ_f_new, KQ_r_new,\
                                        trial_state_sample, state)
            #print(current_reward)

            action_value = current_reward + (gamma) * value_current_state

            if (action_value > temp_action_value):
                #then a new action is the best.
                temp_action_value = action_value

                #set best output variables
                final_action = act
                final_reward = current_reward
                final_KQ_f = KQ_f_new
                final_KQ_r = KQ_r_new
                final_v_log_counts = new_v_log_counts
                final_state = trial_state_sample
                final_delta_s_metab = new_delta_S_metab
    #print(final_state)
    #print('final_delta_s_metab')
    #print(final_delta_s_metab)
    return [final_action,\
            final_reward,\
            final_KQ_f,\
            final_KQ_r,\
            final_v_log_counts,\
            final_state,\
            final_delta_s_metab,used_random_step,0.0,0.0]
Esempio n. 3
0
def run(argv):
    try:
        os.makedirs(cwd + '/GLYCOLYSIS_TCA_GOGAT/data')
    except FileExistsError:
        # directory already exists
        pass
    try:
        os.makedirs(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data')
    except FileExistsError:
        # directory already exists
        pass

    pd.set_option('display.max_columns', None, 'display.max_rows', None)

    ###Default Values
    #If no experimental data  is available, we can estimate using 'rule-of-thumb' data at 0.001
    use_experimental_data = False
    learning_rate = 1e-8  #3rd
    epsilon = 0.5  #4th
    eps_threshold = 25  #5th
    gamma = 0.9  #6th
    updates = 500
    penalty_reward_scalar = 0.0

    #load input
    total = len(sys.argv)
    cmdargs = str(sys.argv)
    print("The total numbers of args passed to the script: %d " % total)
    print("Args list: %s " % cmdargs)
    print("Script name: %s" % str(sys.argv[0]))
    for i in range(total):
        print("Argument # %d : %s" % (i, str(sys.argv[i])))

    sim_number = int(sys.argv[1])
    n_back_step = int(sys.argv[2])
    if (n_back_step < 1):
        print('n must be larger than zero')
        return
    if (total > 3):
        use_experimental_data = bool(int(sys.argv[3]))
    if (total > 4):
        learning_rate = float(sys.argv[4])
    if (total > 5):
        epsilon = float(sys.argv[5])
    if (total > 6):
        eps_threshold = float(sys.argv[6])
    if (total > 7):
        gamma = float(sys.argv[7])

    print("sim")
    print(sim_number)
    print("n_back_step")
    print(n_back_step)
    print("using experimental metabolite data")
    print(use_experimental_data)
    print("learning_rate")
    print(learning_rate)
    print("epsilon")
    print(epsilon)
    print("eps_threshold")
    print(eps_threshold)
    print("gamma")
    print(gamma)

    #Initial Values
    T = 298.15
    R = 8.314e-03
    RT = R * T
    N_avogadro = 6.022140857e+23
    VolCell = 1.0e-15
    Concentration2Count = N_avogadro * VolCell
    concentration_increment = 1 / (N_avogadro * VolCell)

    np.set_printoptions(suppress=True)  #turn off printin

    fdat = open(cwd + '/GLYCOLYSIS_TCA_GOGAT/GLYCOLYSIS_TCA_GOGAT.dat', 'r')

    left = 'LEFT'
    right = 'RIGHT'
    left_compartment = 'LEFT_COMPARTMENT'
    right_compartment = 'RIGHT_COMPARTMENT'
    enzyme_level = 'ENZYME_LEVEL'
    deltag0 = 'DGZERO'
    deltag0_sigma = 'DGZERO StdDev'
    same_compartment = 'Same Compartment?'
    full_rxn = 'Full Rxn'

    reactions = pd.DataFrame(index=[],
                             columns=[
                                 left, right, left_compartment,
                                 right_compartment, enzyme_level, deltag0,
                                 deltag0_sigma, same_compartment, full_rxn
                             ])
    reactions.index.name = 'REACTION'
    S_matrix = pd.DataFrame(index=[], columns=[enzyme_level])
    S_matrix.index.name = 'REACTION'

    for line in fdat:
        if (line.startswith('REACTION')):
            rxn_name = line[9:-1].lstrip()
            S_matrix.loc[rxn_name, enzyme_level] = 1.0
            reactions.loc[rxn_name, enzyme_level] = 1.0

        if (re.match("^LEFT\s", line)):
            line = line.upper()
            left_rxn = line[4:-1].lstrip()
            left_rxn = re.sub(r'\s+$', '',
                              left_rxn)  #Remove trailing white space
            reactions.loc[rxn_name, left] = left_rxn

        elif (re.match('^RIGHT\s', line)):
            line = line.upper()
            right_rxn = line[5:-1].lstrip()
            right_rxn = re.sub(r'\s+$', '',
                               right_rxn)  #Remove trailing white space
            reactions.loc[rxn_name, right] = right_rxn

        elif (line.startswith(left_compartment)):
            cpt_name = line[16:-1].lstrip()
            reactions.loc[rxn_name, left_compartment] = cpt_name
            reactants = re.split(' \+ ', left_rxn)
            for idx in reactants:
                values = re.split(' ', idx)
                if len(values) == 2:
                    stoichiometry = np.float64(values[0])
                    molecule = values[1]
                    if not re.search(':', molecule):
                        molecule = molecule + ':' + cpt_name
                else:
                    stoichiometry = np.float64(-1.0)
                    molecule = values[0]
                    if not re.search(':', molecule):
                        molecule = molecule + ':' + cpt_name
                S_matrix.loc[rxn_name, molecule] = stoichiometry

        elif (line.startswith(right_compartment)):
            cpt_name = line[17:-1].lstrip()
            reactions.loc[rxn_name, right_compartment] = cpt_name
            products = re.split(' \+ ', right_rxn)
            for idx in products:
                values = re.split(' ', idx)
                if len(values) == 2:
                    stoichiometry = np.float64(values[0])
                    molecule = values[1]
                    if not re.search(':', molecule):
                        molecule = molecule + ':' + cpt_name
                else:
                    stoichiometry = np.float64(1.0)
                    molecule = values[0]
                    if not re.search(':', molecule):
                        molecule = molecule + ':' + cpt_name
                S_matrix.loc[rxn_name, molecule] = stoichiometry

        elif (re.match("^ENZYME_LEVEL\s", line)):
            level = line[12:-1].lstrip()
            reactions.loc[rxn_name, enzyme_level] = float(level)
            S_matrix.loc[rxn_name, enzyme_level] = float(level)

        elif re.match('^COMMENT', line):
            continue
        elif re.match(r'//', line):
            continue
        elif re.match('^#', line):
            continue

    fdat.close()
    S_matrix.fillna(0, inplace=True)
    S_active = S_matrix[S_matrix[enzyme_level] > 0.0]
    active_reactions = reactions[reactions[enzyme_level] > 0.0]
    del S_active[enzyme_level]
    S_active = S_active.loc[:, (S_active != 0).any(axis=0)]
    np.shape(S_active.values)

    reactions[full_rxn] = reactions[left] + ' = ' + reactions[right]

    if (1):
        for idx in reactions.index:
            boltzmann_rxn_str = reactions.loc[idx, 'Full Rxn']
            if re.search(':', boltzmann_rxn_str):
                all_cmprts = re.findall(':\S+', boltzmann_rxn_str)
                [s.replace(':', '') for s in all_cmprts]  # remove all the ':'s
                different_compartments = 0
                for cmpt in all_cmprts:
                    if not re.match(all_cmprts[0], cmpt):
                        different_compartments = 1
                if ((not different_compartments)
                        and (reactions[left_compartment].isnull
                             or reactions[right_compartment].isnull)):
                    reactions.loc[idx, left_compartment] = cmpt
                    reactions.loc[idx, right_compartment] = cmpt
                    reactions.loc[idx, same_compartment] = True
                if different_compartments:
                    reactions.loc[idx, same_compartment] = False
            else:
                if (reactions.loc[idx, left_compartment] == reactions.loc[
                        idx, right_compartment]):
                    reactions.loc[idx, same_compartment] = True
                else:
                    reactions.loc[idx, same_compartment] = False

    # ## Calculate Standard Free Energies of Reaction
    reactions.loc['CSm', deltag0] = -35.1166
    reactions.loc['ACONTm', deltag0] = 7.62949
    reactions.loc['ICDHxm', deltag0] = -2.872
    reactions.loc['AKGDam', deltag0] = -36.3549
    reactions.loc['SUCOASm', deltag0] = 1.924481
    reactions.loc['SUCD1m', deltag0] = 0
    reactions.loc['FUMm', deltag0] = -3.44873
    reactions.loc['MDHm', deltag0] = 29.9942
    reactions.loc['GAPD', deltag0] = 6.68673
    reactions.loc['PGK', deltag0] = -18.4733
    reactions.loc['TPI', deltag0] = 5.48642
    reactions.loc['FBA', deltag0] = 20.5096
    reactions.loc['PYK', deltag0] = -27.5366
    reactions.loc['PGM', deltag0] = 4.19953
    reactions.loc['ENO', deltag0] = -4.08222
    reactions.loc['HEX1', deltag0] = -17.0578
    reactions.loc['PGI', deltag0] = 2.52401
    reactions.loc['PFK', deltag0] = -15.4549
    reactions.loc['PYRt2m', deltag0] = -RT * np.log(10)
    reactions.loc['PDHm', deltag0] = -43.9219
    reactions.loc['GOGAT', deltag0] = 48.8552

    reactions.loc['CSm', deltag0_sigma] = 0.930552
    reactions.loc['ACONTm', deltag0_sigma] = 0.733847
    reactions.loc['ICDHxm', deltag0_sigma] = 7.62095
    reactions.loc['AKGDam', deltag0_sigma] = 7.97121
    reactions.loc['SUCOASm', deltag0_sigma] = 1.48197
    reactions.loc['SUCD1m', deltag0_sigma] = 2.31948
    reactions.loc['FUMm', deltag0_sigma] = 0.607693
    reactions.loc['MDHm', deltag0_sigma] = 0.422376
    reactions.loc['GAPD', deltag0_sigma] = 0.895659
    reactions.loc['PGK', deltag0_sigma] = 0.889982
    reactions.loc['TPI', deltag0_sigma] = 0.753116
    reactions.loc['FBA', deltag0_sigma] = 0.87227
    reactions.loc['PYK', deltag0_sigma] = 0.939774
    reactions.loc['PGM', deltag0_sigma] = 0.65542
    reactions.loc['ENO', deltag0_sigma] = 0.734193
    reactions.loc['HEX1', deltag0_sigma] = 0.715237
    reactions.loc['PGI', deltag0_sigma] = 0.596775
    reactions.loc['PFK', deltag0_sigma] = 0.886629
    reactions.loc['PYRt2m', deltag0_sigma] = 0
    reactions.loc['PDHm', deltag0_sigma] = 7.66459
    reactions.loc['GOGAT', deltag0_sigma] = 2.0508

    # ## Set Fixed Concentrations/Boundary Conditions
    conc = 'Conc'
    variable = 'Variable'
    conc_exp = 'Conc_Experimental'
    metabolites = pd.DataFrame(index=S_active.columns,
                               columns=[conc, conc_exp, variable])
    metabolites[conc] = 0.001
    metabolites[variable] = True

    # Set the fixed metabolites:
    metabolites.loc['ATP:MITOCHONDRIA', conc] = 9.600000e-03
    metabolites.loc['ATP:MITOCHONDRIA', variable] = False
    metabolites.loc['ADP:MITOCHONDRIA', conc] = 5.600000e-04
    metabolites.loc['ADP:MITOCHONDRIA', variable] = False
    metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA', conc] = 2.000000e-02
    metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA', variable] = False

    metabolites.loc['ATP:CYTOSOL', conc] = 9.600000e-03
    metabolites.loc['ATP:CYTOSOL', variable] = False
    metabolites.loc['ADP:CYTOSOL', conc] = 5.600000e-04
    metabolites.loc['ADP:CYTOSOL', variable] = False
    metabolites.loc['ORTHOPHOSPHATE:CYTOSOL', conc] = 2.000000e-02
    metabolites.loc['ORTHOPHOSPHATE:CYTOSOL', variable] = False

    metabolites.loc['NADH:MITOCHONDRIA', conc] = 8.300000e-05
    metabolites.loc['NADH:MITOCHONDRIA', variable] = False
    metabolites.loc['NAD+:MITOCHONDRIA', conc] = 2.600000e-03
    metabolites.loc['NAD+:MITOCHONDRIA', variable] = False

    metabolites.loc['NADH:CYTOSOL', conc] = 8.300000e-05
    metabolites.loc['NADH:CYTOSOL', variable] = False
    metabolites.loc['NAD+:CYTOSOL', conc] = 2.600000e-03
    metabolites.loc['NAD+:CYTOSOL', variable] = False

    metabolites.loc['ACETYL-COA:MITOCHONDRIA', conc] = 6.06E-04
    metabolites.loc['ACETYL-COA:MITOCHONDRIA', variable] = True

    metabolites.loc['COA:MITOCHONDRIA', conc] = 1.400000e-03
    metabolites.loc['COA:MITOCHONDRIA', variable] = False

    metabolites.loc['CO2:MITOCHONDRIA', conc] = 1.000000e-04
    metabolites.loc['CO2:MITOCHONDRIA', variable] = False

    metabolites.loc['H2O:MITOCHONDRIA', conc] = 55.5
    metabolites.loc['H2O:MITOCHONDRIA', variable] = False
    metabolites.loc['H2O:CYTOSOL', conc] = 55.5
    metabolites.loc['H2O:CYTOSOL', variable] = False

    metabolites.loc['BETA-D-GLUCOSE:CYTOSOL', conc] = 2.000000e-03
    metabolites.loc['BETA-D-GLUCOSE:CYTOSOL', variable] = False

    metabolites.loc['L-GLUTAMATE:MITOCHONDRIA', conc] = 9.60e-05
    metabolites.loc['L-GLUTAMATE:MITOCHONDRIA', variable] = False
    metabolites.loc['L-GLUTAMINE:MITOCHONDRIA', conc] = 3.81e-03
    metabolites.loc['L-GLUTAMINE:MITOCHONDRIA', variable] = False

    #When loading experimental concentrations, first copy current
    #rule of thumb then overwrite with data values.
    metabolites[conc_exp] = metabolites[conc]
    metabolites.loc['(S)-MALATE:MITOCHONDRIA', conc_exp] = 1.68e-03
    metabolites.loc['BETA-D-GLUCOSE-6-PHOSPHATE:CYTOSOL', conc_exp] = 7.88e-03
    metabolites.loc['D-GLYCERALDEHYDE-3-PHOSPHATE:CYTOSOL',
                    conc_exp] = 2.71e-04
    metabolites.loc['PYRUVATE:MITOCHONDRIA', conc_exp] = 3.66e-03
    metabolites.loc['ISOCITRATE:MITOCHONDRIA', conc_exp] = 1.000000e-03
    metabolites.loc['OXALOACETATE:MITOCHONDRIA', conc_exp] = 1.000000e-03
    metabolites.loc['3-PHOSPHO-D-GLYCEROYL_PHOSPHATE:CYTOSOL',
                    conc_exp] = 1.000000e-03
    metabolites.loc['ACETYL-COA:MITOCHONDRIA', conc_exp] = 6.06e-04
    metabolites.loc['CITRATE:MITOCHONDRIA', conc_exp] = 1.96e-03
    metabolites.loc['2-OXOGLUTARATE:MITOCHONDRIA', conc_exp] = 4.43e-04
    metabolites.loc['FUMARATE:MITOCHONDRIA', conc_exp] = 1.15e-04
    metabolites.loc['SUCCINYL-COA:MITOCHONDRIA', conc_exp] = 2.33e-04
    metabolites.loc['3-PHOSPHO-D-GLYCERATE:CYTOSOL', conc_exp] = 1.54e-03
    metabolites.loc['GLYCERONE_PHOSPHATE:CYTOSOL', conc_exp] = 3.060000e-03
    metabolites.loc['SUCCINATE:MITOCHONDRIA', conc_exp] = 5.69e-04
    metabolites.loc['PHOSPHOENOLPYRUVATE:CYTOSOL', conc_exp] = 1.84e-04
    metabolites.loc['D-FRUCTOSE_1,6-BISPHOSPHATE:CYTOSOL', conc_exp] = 1.52e-02
    metabolites.loc['D-FRUCTOSE_6-PHOSPHATE:CYTOSOL', conc_exp] = 2.52e-03
    metabolites.loc['PYRUVATE:CYTOSOL', conc_exp] = 3.66E-03
    metabolites.loc['2-PHOSPHO-D-GLYCERATE:CYTOSOL', conc_exp] = 9.180e-05

    #%%
    nvariables = metabolites[metabolites[variable]].count()
    nvar = nvariables[variable]

    metabolites.sort_values(
        by=variable,
        axis=0,
        ascending=False,
        inplace=True,
    )

    # ## Prepare model for optimization

    # - Adjust S Matrix to use only reactions with activity > 0, if necessary.
    # - Water stoichiometry in the stiochiometric matrix needs to be set to zero since water is held constant.
    # - The initial concentrations of the variable metabolites are random.
    # - All concentrations are changed to log counts.
    # - Equilibrium constants are calculated from standard free energies of reaction.
    # - R (reactant) and P (product) matrices are derived from S.

    # Make sure all the indices and columns are in the correct order:
    active_reactions = reactions[reactions[enzyme_level] > 0.0]

    Sactive_index = S_active.index

    active_reactions.reindex(index=Sactive_index, copy=False)
    S_active = S_active.reindex(columns=metabolites.index, copy=False)
    S_active['H2O:MITOCHONDRIA'] = 0
    S_active['H2O:CYTOSOL'] = 0

    where_are_NaNs = np.isnan(S_active)
    S_active[where_are_NaNs] = 0

    S_mat = S_active.values

    Keq_constant = np.exp(-active_reactions[deltag0].astype('float') / RT)
    Keq_constant = Keq_constant.values

    P_mat = np.where(S_mat > 0, S_mat, 0)
    R_back_mat = np.where(S_mat < 0, S_mat, 0)
    E_regulation = np.ones(
        Keq_constant.size
    )  # THis is the vector of enzyme activities, Range: 0 to 1.

    mu0 = 1  #Dummy parameter for now; reserved for free energies of formation

    conc_type = conc
    if (use_experimental_data):
        print("USING EXPERIMENTAL DATA")
        conc_type = conc_exp

    variable_concs = np.array(metabolites[conc_type].iloc[0:nvar].values,
                              dtype=np.float64)
    v_log_concs = -10 + 10 * np.random.rand(
        nvar)  #Vary between 1 M to 1.0e-10 M
    v_concs = np.exp(v_log_concs)
    v_log_counts_stationary = np.log(v_concs * Concentration2Count)
    v_log_counts = v_log_counts_stationary
    #display(v_log_counts)

    fixed_concs = np.array(metabolites[conc_type].iloc[nvar:].values,
                           dtype=np.float64)
    fixed_counts = fixed_concs * Concentration2Count
    f_log_counts = np.log(fixed_counts)

    complete_target_log_counts = np.log(Concentration2Count *
                                        metabolites[conc_type].values)
    target_v_log_counts = complete_target_log_counts[0:nvar]
    target_f_log_counts = complete_target_log_counts[nvar:]

    delta_increment_for_small_concs = (10**-50) * np.zeros(
        metabolites[conc_type].values.size)

    variable_concs_begin = np.array(metabolites[conc_type].iloc[0:nvar].values,
                                    dtype=np.float64)

    #%% Basic test
    v_log_counts = np.log(variable_concs_begin * Concentration2Count)

    E_regulation = np.ones(
        Keq_constant.size
    )  # THis is the vector of enzyme activities, Range: 0 to 1.
    nvar = v_log_counts.size
    #WARNING: INPUT LOG_COUNTS TO ALL FUNCTIONS. CONVERSION TO COUNTS IS DONE INTERNALLY
    res_lsq1 = least_squares(max_entropy_functions.derivatives,
                             v_log_counts,
                             method='lm',
                             xtol=1e-15,
                             args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                                   delta_increment_for_small_concs,
                                   Keq_constant, E_regulation))
    res_lsq2 = least_squares(max_entropy_functions.derivatives,
                             v_log_counts,
                             method='dogbox',
                             xtol=1e-15,
                             args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                                   delta_increment_for_small_concs,
                                   Keq_constant, E_regulation))

    rxn_flux = max_entropy_functions.oddsDiff(res_lsq1.x, f_log_counts, mu0,
                                              S_mat, R_back_mat, P_mat,
                                              delta_increment_for_small_concs,
                                              Keq_constant, E_regulation)

    begin_log_metabolites = np.append(res_lsq1.x, f_log_counts)

    E_regulation = np.ones(
        Keq_constant.size
    )  # THis is the vector of enzyme activities, Range: 0 to 1.
    log_metabolites = np.append(res_lsq1.x, f_log_counts)
    KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant)

    Keq_inverse = np.power(Keq_constant, -1)
    KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat,
                                      R_back_mat,
                                      delta_increment_for_small_concs,
                                      Keq_inverse, -1)

    [RR,
     Jac] = max_entropy_functions.calc_Jac2(res_lsq1.x, f_log_counts, S_mat,
                                            delta_increment_for_small_concs,
                                            KQ_f, KQ_r, E_regulation)
    A = max_entropy_functions.calc_A(res_lsq1.x, f_log_counts, S_mat, Jac,
                                     E_regulation)

    [ccc, fcc
     ] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat,
                                                       rxn_flux, RR)

    React_Choice = 6

    newE = max_entropy_functions.calc_reg_E_step(E_regulation, React_Choice,
                                                 nvar, res_lsq1.x,
                                                 f_log_counts,
                                                 complete_target_log_counts,
                                                 S_mat, A, rxn_flux, KQ_f)

    delta_S_metab = max_entropy_functions.calc_deltaS_metab(
        res_lsq1.x, target_v_log_counts)

    ipolicy = 7  #use ipolicy=1 or 4

    #%% END Basic test

    #Machine learning
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Using device:', device)

    #set variables in ML program
    me.cwd = cwd
    me.device = device
    me.v_log_counts_static = v_log_counts_stationary
    me.target_v_log_counts = target_v_log_counts
    me.complete_target_log_counts = complete_target_log_counts
    me.Keq_constant = Keq_constant
    me.f_log_counts = f_log_counts

    me.P_mat = P_mat
    me.R_back_mat = R_back_mat
    me.S_mat = S_mat
    me.delta_increment_for_small_concs = delta_increment_for_small_concs
    me.nvar = nvar
    me.mu0 = mu0

    me.gamma = gamma
    me.num_rxns = Keq_constant.size
    me.penalty_reward_scalar = penalty_reward_scalar

    #%%
    N, D_in, H, D_out = 1, Keq_constant.size, 20 * Keq_constant.size, 1

    #create neural network
    nn_model = torch.nn.Sequential(torch.nn.Linear(D_in, H), torch.nn.Tanh(),
                                   torch.nn.Linear(H, D_out)).to(device)

    loss_fn = torch.nn.MSELoss(reduction='sum')
    #loss_fn = torch.nn.L1Loss()
    #learning_rate=5e-6
    #optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9)
    optimizer = torch.optim.SGD(nn_model.parameters(),
                                lr=learning_rate,
                                momentum=0.9)

    #optimizer = torch.optim.Adam(nn_model.parameters(), lr=3e-4)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=500,
                                                           verbose=True,
                                                           min_lr=1e-10,
                                                           cooldown=10,
                                                           threshold=1e-5)

    v_log_counts = v_log_counts_stationary.copy()
    episodic_loss = []
    episodic_loss_max = []
    episodic_epr = []
    episodic_reward = []

    episodic_nn_step = []
    episodic_random_step = []

    epsilon_greedy_init = epsilon

    final_states = np.zeros(Keq_constant.size)
    final_KQ_fs = np.zeros(Keq_constant.size)
    final_KQ_rs = np.zeros(Keq_constant.size)
    epr_per_state = []

    for update in range(0, updates):

        x_changing = 1 * torch.rand(1000, D_in, device=device)

        #generate state to use
        state_sample = np.zeros(Keq_constant.size)
        for sample in range(0, len(state_sample)):
            state_sample[sample] = np.random.uniform(1, 1)

        #annealing test
        if ((update % eps_threshold == 0) and (update != 0)):
            epsilon = epsilon / 2
            print("RESET epsilon ANNEALING")
            print(epsilon)

        prediction_x_changing_previous = nn_model(x_changing)
        #nn_model.train()
        [sum_reward, average_loss,max_loss,final_epr,final_state,final_KQ_f,final_KQ_r, reached_terminal_state,\
         random_steps_taken,nn_steps_taken] = me.sarsa_n(nn_model,loss_fn, optimizer, scheduler, state_sample, n_back_step, epsilon)

        print("EPISODE")
        print(update)
        print("MAXIMUM LAYER WEIGHTS")
        for layer in nn_model.modules():
            try:
                print(torch.max(layer.weight))
            except:
                print("")
        print('random,nn steps')
        print(random_steps_taken)
        print(nn_steps_taken)
        if (reached_terminal_state):
            final_states = np.vstack((final_states, final_state))
            final_KQ_fs = np.vstack((final_KQ_fs, final_KQ_f))
            final_KQ_rs = np.vstack((final_KQ_rs, final_KQ_r))
            epr_per_state.append(final_epr)

            episodic_epr.append(final_epr)
            episodic_loss.append(average_loss)
            episodic_loss_max.append(max_loss)
            episodic_reward.append(sum_reward)
            episodic_nn_step.append(nn_steps_taken)
            episodic_random_step.append(random_steps_taken)

        scheduler.step(average_loss)
        print("TOTAL REWARD")
        print(sum_reward)
        print("ave loss")
        print(average_loss)
        print("max_loss")
        print(max_loss)

        print(optimizer.state_dict)
        print(scheduler.state_dict())
        prediction_x_changing = nn_model(x_changing)

        total_prediction_changing_diff = sum(
            abs(prediction_x_changing - prediction_x_changing_previous))
        print("TOTALPREDICTION")
        print(total_prediction_changing_diff)

        np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/data/' +
                   'temp_episodic_loss_' + str(n_back_step) + '_lr' +
                   str(learning_rate) + '_' + str(eps_threshold) + '_eps' +
                   str(epsilon_greedy_init) + '_' + str(sim_number) +
                   '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) +
                   '_use_experimental_metab_' +
                   str(int(use_experimental_data)) + '.txt',
                   episodic_loss,
                   fmt='%f')

        np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/data/' + 'temp_epr_' +
                   str(n_back_step) + '_lr' + str(learning_rate) + '_' +
                   str(eps_threshold) + '_eps' + str(epsilon_greedy_init) +
                   '_' + str(sim_number) + '_penalty_reward_scalar_' +
                   str(me.penalty_reward_scalar) + '_use_experimental_metab_' +
                   str(int(use_experimental_data)) + '.txt',
                   episodic_epr,
                   fmt='%f')

        np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/data/' +
                   'temp_episodic_reward_' + str(n_back_step) + '_lr' +
                   str(learning_rate) + '_' + str(eps_threshold) + '_eps' +
                   str(epsilon_greedy_init) + '_' + str(sim_number) +
                   '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) +
                   '_use_experimental_metab_' +
                   str(int(use_experimental_data)) + '.txt',
                   episodic_reward,
                   fmt='%f')

        if (update > 200):
            if ((max(episodic_loss[-100:]) - min(episodic_loss[-100:]) < 0.025)
                    and (update > 350)):
                break

    #%%
    #gamma9 -> gamma=0.9
    #n8 -> n_back_step=8
    #k5 -> E=E-E/5 was used
    #lr5e6 -> begin lr=0.5*e-6

    torch.save(nn_model, cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+
                'complete_model_gly_tca_gog_gamma9_n'+str(n_back_step)+'_k5_'\
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_' +str(int(use_experimental_data))+
                '_sim'+str(sim_number) + '.pth')

    np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' +
               'episodic_loss_gamma9_n' + str(n_back_step) + '_k5_'
               '_lr' + str(learning_rate) + '_threshold' + str(eps_threshold) +
               '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' +
               str(me.penalty_reward_scalar) + '_use_experimental_metab_' +
               str(int(use_experimental_data)) + '_sim' + str(sim_number) +
               '.txt',
               episodic_loss,
               fmt='%f')

    np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' +
               'episodic_loss_max_gamma9_n' + str(n_back_step) + '_k5_' +
               '_lr' + str(learning_rate) + '_threshold' + str(eps_threshold) +
               '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' +
               str(me.penalty_reward_scalar) + '_use_experimental_metab_' +
               str(int(use_experimental_data)) + '_sim' + str(sim_number) +
               '.txt',
               episodic_loss_max,
               fmt='%f')

    np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' +
               'episodic_reward_gamma9_n' + str(n_back_step) + '_k5_' + '_lr' +
               str(learning_rate) + '_threshold' + str(eps_threshold) +
               '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' +
               str(me.penalty_reward_scalar) + '_use_experimental_metab_' +
               str(int(use_experimental_data)) + '_sim' + str(sim_number) +
               '.txt',
               episodic_reward,
               fmt='%f')

    np.savetxt(cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+
                'final_states_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+\
                '.txt', final_states, fmt='%f')

    np.savetxt(cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+
                'final_KQF_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+\
                '.txt', final_KQ_fs, fmt='%f')

    np.savetxt(cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+
                'final_KQR_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+\
                '.txt', final_KQ_rs, fmt='%f')

    np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' +
               'epr_per_state_gamma9_n' + str(n_back_step) + '_k5_' + '_lr' +
               str(learning_rate) + '_threshold' + str(eps_threshold) +
               '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' +
               str(me.penalty_reward_scalar) + '_use_experimental_metab_' +
               str(int(use_experimental_data)) + '_sim' + str(sim_number) +
               '.txt',
               epr_per_state,
               fmt='%f')
Esempio n. 4
0
                            method='lm',
                            xtol=1e-15,
                            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                                  delta_increment_for_small_concs,
                                  Keq_constant, E_regulation))
    v_log_counts = res_lsq.x
    log_metabolites = np.append(v_log_counts, f_log_counts)

    #make calculations to regulate
    rxn_flux = max_entropy_functions.oddsDiff(v_log_counts, f_log_counts, mu0,
                                              S_mat, R_back_mat, P_mat,
                                              delta_increment_for_small_concs,
                                              Keq_constant, E_regulation)

    KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant)
    Keq_inverse = np.power(Keq_constant, -1)
    KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat,
                                      R_back_mat,
                                      delta_increment_for_small_concs,
                                      Keq_inverse, -1)

    epr = max_entropy_functions.entropy_production_rate(
        KQ_f, KQ_r, E_regulation)

    delta_S_metab = max_entropy_functions.calc_deltaS_metab(
        v_log_counts, target_v_log_counts)

    delta_S = max_entropy_functions.calc_deltaS(v_log_counts,
                                                target_v_log_counts,
def policy_function(nn_model, state, v_log_counts_path, *args):
    #last input argument should be epsilon for use when using greedy-epsilon algorithm.

    KQ_f_matrix = np.zeros(shape=(num_rxns, num_rxns))
    KQ_r_matrix = np.zeros(shape=(num_rxns, num_rxns))
    states_matrix = np.zeros(shape=(num_rxns, num_rxns))
    delta_S_metab_matrix = np.zeros(shape=(nvar, num_rxns))
    v_log_counts_matrix = np.zeros(shape=(nvar, num_rxns))

    varargin = args
    nargin = len(varargin)
    epsilon_greedy = 0.0
    if (nargin == 1):
        epsilon_greedy = varargin[0]

    rxn_choices = [i for i in range(num_rxns)]

    res_lsq = least_squares(max_entropy_functions.derivatives,
                            v_log_counts_path,
                            method=Method1,
                            bounds=(-500, 500),
                            xtol=1e-15,
                            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                                  delta_increment_for_small_concs,
                                  Keq_constant, state))
    if (res_lsq.optimality > 1e-05):
        res_lsq = least_squares(max_entropy_functions.derivatives,
                                v_log_counts_path,
                                method=Method2,
                                xtol=1e-15,
                                args=(f_log_counts, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant, state))
        if (res_lsq.optimality > 1e-05):
            res_lsq = least_squares(
                max_entropy_functions.derivatives,
                v_log_counts_path,
                method=Method3,
                xtol=1e-15,
                args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                      delta_increment_for_small_concs, Keq_constant, state))

    #v_log_counts = v_log_counts_path.copy()
    v_log_counts = res_lsq.x
    if (np.sum(np.abs(v_log_counts - v_log_counts_path)) > 0.01):
        print("ERROR IN POLICY V_COUNT OPTIMIZATION")
    log_metabolites = np.append(v_log_counts, f_log_counts)

    rxn_flux = max_entropy_functions.oddsDiff(v_log_counts, f_log_counts, mu0,
                                              S_mat, R_back_mat, P_mat,
                                              delta_increment_for_small_concs,
                                              Keq_constant, state)
    KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant)
    Keq_inverse = np.power(Keq_constant, -1)
    KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat,
                                      R_back_mat,
                                      delta_increment_for_small_concs,
                                      Keq_inverse, -1)

    [RR,
     Jac] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat,
                                            delta_increment_for_small_concs,
                                            KQ_f, KQ_r, state)
    A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat, Jac,
                                     state)

    delta_S_metab = max_entropy_functions.calc_deltaS_metab(
        v_log_counts, target_v_log_counts)
    #delta_S = max_entropy_functions.calc_deltaS(v_log_counts, f_log_counts, S_mat, KQ_f)

    [ccc, fcc
     ] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat,
                                                       rxn_flux, RR)

    init_action_val = -np.inf

    action_value_vec = np.zeros(num_rxns)
    state_value_vec = np.zeros(num_rxns)
    E_test_vec = np.zeros(num_rxns)
    old_E_test_vec = np.zeros(num_rxns)
    current_reward_vec = np.zeros(num_rxns)
    #print("BEGIN ACTIONS")

    for act in range(0, num_rxns):
        React_Choice = act  #regulate each reaction.

        old_E = state[act]

        newE = max_entropy_functions.calc_reg_E_step(state, React_Choice, nvar, v_log_counts, f_log_counts,
                               complete_target_log_counts, S_mat, A, rxn_flux, KQ_f,\
                               delta_S_metab)

        trial_state_sample = state.copy()  #DO NOT MODIFY ORIGINAL STATE
        trial_state_sample[React_Choice] = newE
        states_matrix[:, act] = trial_state_sample.copy()

        #re-optimize
        new_res_lsq = least_squares(max_entropy_functions.derivatives,
                                    v_log_counts,
                                    method=Method1,
                                    bounds=(-500, 500),
                                    xtol=1e-15,
                                    args=(f_log_counts, mu0, S_mat, R_back_mat,
                                          P_mat,
                                          delta_increment_for_small_concs,
                                          Keq_constant, trial_state_sample))
        if (new_res_lsq.optimality >= 1e-05):
            new_res_lsq = least_squares(
                max_entropy_functions.derivatives,
                v_log_counts,
                method=Method2,
                xtol=1e-15,
                args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                      delta_increment_for_small_concs, Keq_constant,
                      trial_state_sample))
            if (new_res_lsq.optimality >= 1e-05):
                new_res_lsq = least_squares(
                    max_entropy_functions.derivatives,
                    v_log_counts,
                    method=Method3,
                    xtol=1e-15,
                    args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                          delta_increment_for_small_concs, Keq_constant,
                          trial_state_sample))

        new_v_log_counts = new_res_lsq.x
        v_log_counts_matrix[:, act] = new_v_log_counts.copy()

        new_log_metabolites = np.append(new_v_log_counts, f_log_counts)

        KQ_f_matrix[:, act] = max_entropy_functions.odds(
            new_log_metabolites, mu0, S_mat, R_back_mat, P_mat,
            delta_increment_for_small_concs, Keq_constant)
        Keq_inverse = np.power(Keq_constant, -1)
        KQ_r_matrix[:, act] = max_entropy_functions.odds(
            new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat,
            delta_increment_for_small_concs, Keq_inverse, -1)

        delta_S_metab_matrix[:, act] = max_entropy_functions.calc_deltaS_metab(
            new_v_log_counts, target_v_log_counts)

        value_current_state = state_value(
            nn_model,
            torch.from_numpy(trial_state_sample).float().to(device))

        value_current_state = value_current_state.item()


        current_reward = reward_value(new_v_log_counts, v_log_counts, \
                                      KQ_f_matrix[:,act], KQ_r_matrix[:,act],\
                                      trial_state_sample, state)

        if (current_reward == penalty_exclusion_reward):
            rxn_choices.remove(act)

        action_value = current_reward + (
            gamma) * value_current_state  #note, action is using old KQ values

        action_value_vec[act] = action_value
        old_E_test_vec[act] = old_E
        E_test_vec[act] = newE
        state_value_vec[act] = value_current_state
        current_reward_vec[act] = current_reward  #Should have smaller EPR
        #print(current_reward)
        #USE PENALTY REWARDS

    if (len(np.flatnonzero(action_value_vec == action_value_vec.max())) == 0):
        print("current action_value_vec")
        print(action_value_vec)
        print(action_value_vec.max())

    #only choose from non penalty rewards
    action_choice_index = np.random.choice(
        np.flatnonzero(action_value_vec[rxn_choices] ==
                       action_value_vec[rxn_choices].max()))
    action_choice = rxn_choices[action_choice_index]

    arr_choice_index = np.flatnonzero(
        action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max())
    arr_choice = np.asarray(rxn_choices)[arr_choice_index]

    arr_choice_reg = np.flatnonzero(state[arr_choice] < 1)
    if (arr_choice_reg.size > 1):
        print('using tie breaker')
        print(arr_choice[arr_choice_reg])
        action_choice = np.random.choice(arr_choice[arr_choice_reg])

    used_random_step = False
    unif_rand = np.random.uniform(0, 1)
    if ((unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)):
        #if (len(rxn_choices)>1):
        #    rxn_choices.remove(action_choice)
        #print("USING EPSILON GREEDY")
        #print(action_choice)
        used_random_step = True
        random_choice = random.choice(rxn_choices)
        action_choice = random_choice
        used_random_step = 1

    if (current_reward_vec == penalty_exclusion_reward).all():
        print("OUT OF REWARDS")
        action_choice = -1

    if current_reward_vec[action_choice] == penalty_exclusion_reward:
        print("state_value_vec")
        print(state_value_vec)
        print("current_reward_vec")
        print(current_reward_vec)
        print("used_random_step")
        print(used_random_step)
        print("rxn_choices")
        print(rxn_choices)

# =============================================================================
#     print("action_choice")
#     print(action_choice)
#     print(delta_S_metab_matrix[:,action_choice])
#     print(states_matrix[:,action_choice])
#     print(states_matrix[:,action_choice]*KQ_f_matrix[:,action_choice])
#     print("rewards")
#     print(current_reward_vec)
#
# =============================================================================
#breakpoint()
    return [action_choice,current_reward_vec[action_choice],\
            KQ_f_matrix[:,action_choice],KQ_r_matrix[:,action_choice],\
            v_log_counts_matrix[:,action_choice],\
            states_matrix[:,action_choice],\
            delta_S_metab_matrix[:,action_choice],used_random_step]
def sarsa_n(nn_model, loss_fn, optimizer, scheduler, state_sample, n_back_step,
            epsilon_greedy):

    #reset for each episode. policy will add
    random_steps_taken = 0
    nn_steps_taken = 0
    maximum_predicted_value = 0
    layer_weight = torch.zeros(1, device=device)

    final_state = []
    final_KQ_f = []
    final_KQ_r = []
    reached_terminal_state = False
    average_loss = []

    final_reward = 0
    sum_reward_episode = 0
    end_of_path = 1000  #this is the maximum length a path can take

    states_matrix = np.zeros(shape=(num_rxns, end_of_path + 1))
    states_matrix[:, 0] = state_sample

    res_lsq = least_squares(max_entropy_functions.derivatives,
                            v_log_counts_static,
                            method=Method1,
                            bounds=(-500, 500),
                            xtol=1e-15,
                            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat,
                                  delta_increment_for_small_concs,
                                  Keq_constant, states_matrix[:, 0]))
    if (res_lsq.success == False):
        res_lsq = least_squares(max_entropy_functions.derivatives,
                                v_log_counts_static,
                                method=Method2,
                                xtol=1e-15,
                                args=(f_log_counts, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant, states_matrix[:, 0]))
        if (res_lsq.success == False):
            res_lsq = least_squares(max_entropy_functions.derivatives,
                                    v_log_counts_static,
                                    method=Method3,
                                    xtol=1e-15,
                                    args=(f_log_counts, mu0, S_mat, R_back_mat,
                                          P_mat,
                                          delta_increment_for_small_concs,
                                          Keq_constant, states_matrix[:, 0]))

    v_log_counts_current = res_lsq.x.copy()
    log_metabolites = np.append(v_log_counts_current, f_log_counts)

    rxn_flux_init = max_entropy_functions.oddsDiff(
        v_log_counts_current, f_log_counts, mu0, S_mat, R_back_mat, P_mat,
        delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0])
    KQ_f_current = max_entropy_functions.odds(log_metabolites, mu0, S_mat,
                                              R_back_mat, P_mat,
                                              delta_increment_for_small_concs,
                                              Keq_constant)

    Keq_inverse = np.power(Keq_constant, -1)
    KQ_r_current = max_entropy_functions.odds(log_metabolites, mu0, -S_mat,
                                              P_mat, R_back_mat,
                                              delta_increment_for_small_concs,
                                              Keq_inverse, -1)

    delta_S_metab_current = max_entropy_functions.calc_deltaS_metab(
        v_log_counts_current, target_v_log_counts)

    #[ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A_init, S_mat, rxn_flux_init, RR)

    reward_vec = np.zeros(end_of_path + 1)

    reward_vec[0] = 0.0
    rxn_flux_path = rxn_flux_init.copy()
    #A_path = A_init.copy()

    for t in range(0, end_of_path):

        if (t < end_of_path):
            #This represents the choice from the current policy.
            [React_Choice,reward_vec[t+1],\
            KQ_f_current, KQ_r_current,\
            v_log_counts_current,\
            states_matrix[:,t+1],\
            delta_S_metab_current,\
            used_random_step] = policy_function(nn_model, states_matrix[:,t], v_log_counts_current, epsilon_greedy)#regulate each reaction.

            if (used_random_step):
                random_steps_taken += 1
            else:
                nn_steps_taken += 1

            if (React_Choice == -1):
                print("out of rewards, final state")
                print(states_matrix[:, t + 1])
                break

            rxn_flux_path = max_entropy_functions.oddsDiff(
                v_log_counts_current, f_log_counts, mu0, S_mat, R_back_mat,
                P_mat, delta_increment_for_small_concs, Keq_constant,
                states_matrix[:, t + 1])
            epr_path = max_entropy_functions.entropy_production_rate(
                KQ_f_current, KQ_r_current, states_matrix[:, t + 1])
            sum_reward_episode += reward_vec[t + 1]

            final_state = states_matrix[:, t + 1].copy()
            #We stop the path if we have no more positive loss function values, or if we revisit a state.
            if ((delta_S_metab_current <= 0.0).all()):
                end_of_path = t + 1  #stops simulation at step t+1

                reached_terminal_state = True
                final_state = states_matrix[:, t + 1].copy()
                final_KQ_f = KQ_f_current.copy()
                final_KQ_r = KQ_r_current.copy()
                final_reward = epr_path
                #breakpoint()
                print(
                    "**************************************Path Length ds<0******************************************"
                )
                print(end_of_path)
                print("Final STATE")
                print(states_matrix[:, t + 1])
                print(rxn_flux_path)
                print("original epr")
                print(epr_path)
                print("all rewards:")
                #print(reward_vec[0:t+1])
        tau = t - n_back_step + 1

        if (tau >= 0):

            #THIS IS THE FORWARD
            estimate_value = torch.zeros(1, device=device)

            for i in range(tau + 1, min(tau + n_back_step, end_of_path) + 1):
                estimate_value += (gamma**(i - tau - 1)) * reward_vec[i]

            if ((tau + n_back_step) < end_of_path):
                value_tau_n = state_value(
                    nn_model,
                    torch.from_numpy(
                        states_matrix[:,
                                      tau + n_back_step]).float().to(device))

                estimate_value += (gamma**(n_back_step)) * value_tau_n

            value_tau = state_value(
                nn_model,
                torch.from_numpy(states_matrix[:, tau]).float().to(device))

            if (value_tau.requires_grad == False):
                print('value tau broken')
            if (estimate_value.requires_grad == True):
                estimate_value.detach_()
            #THIS IS THE END OF FORWARD

            #WARNING
            #loss ordering should be input with requires_grad == True,
            #followed by target with requires_grad == False

            optimizer.zero_grad()

            loss = (loss_fn(value_tau, estimate_value))  #currently MSE

            loss.backward()

            clipping_value = 1.0
            #torch.nn.utils.clip_grad_value_(nn_model.parameters(), clipping_value)
            torch.nn.utils.clip_grad_norm_(nn_model.parameters(),
                                           clipping_value)

            optimizer.step()

            average_loss.append(loss.item())

        if (tau >= (end_of_path - 1)):
            break

    #after episode is finished, take average loss
    average_loss_episode = np.mean(average_loss)
    #print(average_loss)
    print("index of max error on path")
    print(average_loss.index(max(average_loss)))
    #print("All rewards")
    #print(reward_vec[0:t+1])

    return [sum_reward_episode, average_loss_episode,max(average_loss),final_reward, final_state, final_KQ_f,final_KQ_r,\
            reached_terminal_state, random_steps_taken,nn_steps_taken]
Esempio n. 7
0
def policy_function(nn_model, state, v_log_counts_path, *args ):
    #last input argument should be epsilon for use when using greedy-epsilon algorithm. 
    
    varargin = args
    nargin = len(varargin)
    epsilon_greedy = 0.0
    if (nargin == 1):
        epsilon_greedy = varargin[0]
        
    used_random_step=False
    rxn_choices = [i for i in range(num_rxns)]
    
    res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method1,
                            xtol=1e-15, 
                            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state))
    if (res_lsq.success==False):
        print("USING DOGBOX")
        print("v_log_counts_path")
        print(v_log_counts_path)
        print("state")
        print(state)
        res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method2,
            bounds=(-500,500),xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state))
        if (res_lsq.success==False):
            res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method3,xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state))
    

    #v_log_counts = v_log_counts_path.copy()
    v_log_counts = res_lsq.x
         
    log_metabolites = np.append(v_log_counts, f_log_counts)
        
    rxn_flux = max_entropy_functions.oddsDiff(v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)
    KQ_f = max_entropy_functions.odds(log_metabolites, mu0,S_mat, R_back_mat, P_mat, delta_increment_for_small_concs,Keq_constant);
    Keq_inverse = np.power(Keq_constant,-1)
    KQ_r = max_entropy_functions.odds(log_metabolites, mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1);
    
    [RR,Jac] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, state)
    A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat, Jac, state )
    
    
    delta_S_metab = max_entropy_functions.calc_deltaS_metab(v_log_counts, target_v_log_counts)
    
    [ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR)
    
    indices = [i for i in range(0,len(Keq_constant))]
    action_value_vec = np.zeros(num_rxns)
    current_reward_vec = np.zeros(num_rxns)
    current_state_vec = np.zeros(num_rxns)

    variables=[nn_model,state, nvar, v_log_counts, f_log_counts,\
               complete_target_log_counts, A, rxn_flux, KQ_f,\
               delta_S_metab,\
               mu0, S_mat, R_back_mat, P_mat, 
               delta_increment_for_small_concs, Keq_constant]

    start = time.time()
    
    with Pool() as pool:
        async_result = pool.starmap(potential_step, zip(indices, repeat(variables)))
        pool.close()
        pool.join()
    end = time.time()
    
    total = end-start
    #only choose from non penalty rewards     
    time_cpu=0
    time_nn=0   
    for act in range(0,len(async_result)):
        if (async_result[act][1] == penalty_exclusion_reward):
            rxn_choices.remove(act)
        action_value_vec[act] = async_result[act][0]
        current_reward_vec[act] = async_result[act][1]
        time_cpu+=async_result[act][7]
        time_nn+=async_result[act][8]
        current_state_vec[act] = async_result[act][9]
    #print(current_reward_vec)
    
    if (len(rxn_choices) == 0):
        print("OUT OF REWARDS")
        action_choice=-1
    else:
        try:
            action_choice_index = np.random.choice(np.flatnonzero(action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max()))
            action_choice = rxn_choices[action_choice_index]
        except:
            print("WARNING ERROR SHOULD NOT BE HAPPINING")
            print("rxn_choices")
            print(rxn_choices)
            print("action_value_vec")
            print(action_value_vec)
            print("action_value_vec[rxn_choices].max()")
            print(action_value_vec[rxn_choices].max())
            print(np.flatnonzero(action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max()))
            print("current_reward_vec")
            print(current_reward_vec)
            print("current_state_vec")
            print(current_state_vec)
            
            print("MAXIMUM LAYER WEIGHTS")
            for layer in nn_model.modules():
                try:
                    print(torch.max(layer.weight))
                except:
                    print("")

            print("async_result")
            print(async_result)
            action_choice = -1

        unif_rand = np.random.uniform(0,1)
        if ( (unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)):
            #if (len(rxn_choices)>1):
            #    rxn_choices.remove(action_choice)
            #print("USING EPSILON GREEDY")
            #print(action_choice)       
            used_random_step=True
            random_choice = random.choice(rxn_choices)
            action_choice = random_choice
            used_random_step=1

    if (np.sum(np.abs(v_log_counts - v_log_counts_path)) > 0.1):
        print("ERROR IN POLICY V_COUNT OPTIMIZATION")
        #print("async_result")
        #print(async_result)
        print("state")
        print(state)
        print("v_log_counts")
        print(v_log_counts)
        print("v_log_counts_path")
        print(v_log_counts_path)
        print("current_reward_vec")
        print(current_reward_vec)
        print("action_value_vec")
        print(action_value_vec)
        print("rxn_choices")
        print(rxn_choices)
        print("MAXIMUM LAYER WEIGHTS")
        for layer in nn_model.modules():
            try:
                print(torch.max(layer.weight))
            except:
                print("")




    #async_result order
    #[action_value, current_reward,KQ_f_new,KQ_r_new,new_v_log_counts,trial_state_sample,new_delta_S_metab]
    return [action_choice,async_result[action_choice][1],\
            async_result[action_choice][2],async_result[action_choice][3],\
            async_result[action_choice][4],\
            async_result[action_choice][5],\
            async_result[action_choice][6],used_random_step,time_cpu,time_nn]
Esempio n. 8
0
def potential_step(index, other_args):
    React_Choice=index
    
    nn_model,state, nvar, v_log_counts, f_log_counts,\
    complete_target_log_counts, A, rxn_flux, KQ_f,\
    delta_S_metab,\
    mu0, S_mat, R_back_mat, P_mat, \
    delta_increment_for_small_concs, Keq_constant = other_args
    
    
    
    newE = max_entropy_functions.calc_reg_E_step(state, React_Choice, nvar, v_log_counts, f_log_counts,
                               complete_target_log_counts, S_mat, A, rxn_flux, KQ_f,\
                               delta_S_metab)

    trial_state_sample = state.copy()#DO NOT MODIFY ORIGINAL STATE
    trial_state_sample[React_Choice] = newE
        #re-optimize


    start_cpu = time.time()
    new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method1,
                                xtol=1e-15, 
                                args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, 
                                      delta_increment_for_small_concs, Keq_constant, trial_state_sample))
    if (new_res_lsq.success==False):
        print("USING DOGBOX")
        print("v_log_counts")
        print(v_log_counts)
        print("trial_state_sample")
        print(trial_state_sample)
        new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method2,
            bounds=(-500,500), xtol=1e-15, 
            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, 
            delta_increment_for_small_concs, Keq_constant, trial_state_sample))
        if (new_res_lsq.success==False):
            new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method3,xtol=1e-15, 
            args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, 
            delta_increment_for_small_concs, Keq_constant, trial_state_sample))
    
    end_cpu = time.time()

    new_v_log_counts = new_res_lsq.x
    
    new_log_metabolites = np.append(new_v_log_counts, f_log_counts)

    new_delta_S_metab = max_entropy_functions.calc_deltaS_metab(new_v_log_counts, target_v_log_counts)

    KQ_f_new = max_entropy_functions.odds(new_log_metabolites, mu0,S_mat, R_back_mat, P_mat, delta_increment_for_small_concs,Keq_constant);    
    Keq_inverse = np.power(Keq_constant,-1)
    KQ_r_new = max_entropy_functions.odds(new_log_metabolites, mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1);


    
    begin_nn = time.time()
    value_current_state = state_value(nn_model,  torch.from_numpy(trial_state_sample).float().to(device) )
    value_current_state = value_current_state.item()

    end_nn = time.time()
    current_reward = reward_value(new_v_log_counts, v_log_counts, \
                                  KQ_f_new, KQ_r_new,\
                                  trial_state_sample, state)

    action_value = current_reward + (gamma) * value_current_state #note, action is using old KQ values

    return [action_value, current_reward,KQ_f_new,KQ_r_new,new_v_log_counts,trial_state_sample,new_delta_S_metab, end_cpu-start_cpu,end_nn-begin_nn,value_current_state]
def run(argv): 
    try:
        os.makedirs(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data')
    except FileExistsError:
        # directory already exists
        pass
    try:
        os.makedirs(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data')
    except FileExistsError:
        # directory already exists
        pass
    
    
    #default values
    
    #If no experimental data  is available, we can estimate using 'rule-of-thumb' data at 0.001
    use_experimental_data=False
    learning_rate=1e-8 #3rd
    epsilon=0.05 #4th
    eps_threshold=25 #5th
    gamma = 0.9 #6th
    updates = 500
    penalty_reward_scalar=0.0

    #load input
    total = len(sys.argv)
    cmdargs = str(sys.argv)
    print ("The total numbers of args passed to the script: %d " % total)
    print ("Args list: %s " % cmdargs)
    print ("Script name: %s" % str(sys.argv[0]))
    for i in range(total):
        print ("Argument # %d : %s" % (i, str(sys.argv[i])))
    

    sim_number=int(sys.argv[1])
    n_back_step=int(sys.argv[2])
    if (total > 3):
        use_experimental_data=bool(int(sys.argv[3]))
    if (total > 4):
        learning_rate=float(sys.argv[4])
    if (total > 5):
        epsilon=float(sys.argv[5])
    if (total > 6):
        eps_threshold=float(sys.argv[6])
    if (total > 7):
        gamma=float(sys.argv[7])

    
    pd.set_option('display.max_columns', None,'display.max_rows', None)
    
    print("sim")
    print(sim_number)
    print("n_back_step")
    print(n_back_step)
    print("using experimental metabolite data")
    print(use_experimental_data)
    print("learning_rate")
    print(learning_rate)
    print("epsilon")
    print(epsilon)
    print("eps_threshold")
    print(eps_threshold)
    print("gamma")
    print(gamma)

    T = 298.15
    R = 8.314e-03
    RT = R*T
    N_avogadro = 6.022140857e+23
    VolCell = 1.0e-15
    Concentration2Count = N_avogadro * VolCell
    concentration_increment = 1/(N_avogadro*VolCell)
    
    
    np.set_printoptions(suppress=True)#turn off printin
    # In[3]:
    
    
    #with open( cwd + '/TCA_PPP_GLYCOLYSIS_CELLWALL/TCA_PPP_Glycolysis_CellWall3b.dat', 'r') as f:
    #  print(f.read())
      
    
    # In[5]:
    
    
    fdat = open(cwd + '/TCA_PPP_GLYCOLYSIS_CELLWALL/TCA_PPP_Glycolysis_CellWall3b.dat', 'r')
    #fdat = open('TCA_PPP_Glycolysis.dat', 'r')
   
    left ='LEFT'
    right = 'RIGHT'
    left_compartment = 'LEFT_COMPARTMENT'
    right_compartment = 'RIGHT_COMPARTMENT'
    enzyme_level = 'ENZYME_LEVEL'
    deltag0 = 'DGZERO'
    deltag0_sigma = 'DGZERO StdDev'
    same_compartment = 'Same Compartment?'
    full_rxn = 'Full Rxn'
    
    reactions = pd.DataFrame(index=[],columns=[left, right, left_compartment, right_compartment, enzyme_level, deltag0, deltag0_sigma, same_compartment,full_rxn])
    reactions.index.name='REACTION'
    S_matrix = pd.DataFrame(index=[],columns=[enzyme_level])
    S_matrix.index.name='REACTION'
    
    for line in fdat:
        if (line.startswith('REACTION')):
            rxn_name = line[9:-1].lstrip()
            S_matrix.loc[rxn_name,enzyme_level] = 1.0
            reactions.loc[rxn_name,enzyme_level] = 1.0
    
        if (re.match("^LEFT\s",line)):
            line = line.upper()
            left_rxn = line[4:-1].lstrip()
            left_rxn = re.sub(r'\s+$', '', left_rxn) #Remove trailing white space
            reactions.loc[rxn_name,left] = left_rxn
    
        elif (re.match('^RIGHT\s',line)):
            line = line.upper()
            right_rxn = line[5:-1].lstrip()
            right_rxn = re.sub(r'\s+$', '', right_rxn) #Remove trailing white space
            reactions.loc[rxn_name,right] = right_rxn
            
        elif (line.startswith(left_compartment)):
            cpt_name = line[16:-1].lstrip()
            reactions.loc[rxn_name,left_compartment] = cpt_name
            reactants = re.split(' \+ ',left_rxn)
            for idx in reactants:
                values = re.split(' ', idx);
                if len(values) == 2:
                    stoichiometry = np.float64(values[0]);
                    molecule = values[1];
                    if not re.search(':',molecule):
                        molecule = molecule + ':' + cpt_name
                else:
                    stoichiometry = np.float64(-1.0);
                    molecule = values[0]; 
                    if not re.search(':',molecule):
                        molecule = molecule + ':' + cpt_name
                S_matrix.loc[rxn_name,molecule] = stoichiometry;
    
    
        elif (line.startswith(right_compartment)):
            cpt_name = line[17:-1].lstrip()
            reactions.loc[rxn_name,right_compartment] = cpt_name
            products = re.split(' \+ ',right_rxn)
            for idx in products:
                values = re.split(' ', idx);
                if len(values) == 2:
                    stoichiometry = np.float64(values[0]);
                    molecule = values[1];
                    if not re.search(':',molecule):
                        molecule = molecule + ':' + cpt_name
                else:
                    stoichiometry = np.float64(1.0);
                    molecule = values[0];
                    if not re.search(':',molecule):
                        molecule = molecule + ':' + cpt_name
                S_matrix.loc[rxn_name,molecule] = stoichiometry;
    
        elif (re.match("^ENZYME_LEVEL\s", line)):
            level = line[12:-1].lstrip()
            reactions.loc[rxn_name,enzyme_level] = float(level)
            S_matrix.loc[rxn_name,enzyme_level] = float(level)
                    
        elif re.match('^COMMENT',line):
            continue
        elif re.match(r'//',line):
            continue
        elif re.match('^#',line):
            continue
            
    #    elif (re.match("^[N,P]REGULATION\s", line)):
    #        reg = line
    #        reactions.loc[rxn_name,regulation] = reg
    fdat.close()
    S_matrix.fillna(0,inplace=True)
    S_active = S_matrix[S_matrix[enzyme_level] > 0.0]
    active_reactions = reactions[reactions[enzyme_level] > 0.0]
    del S_active[enzyme_level]
    # Delete any columns/metabolites that have all zeros in the S matrix:
    S_active = S_active.loc[:, (S_active != 0).any(axis=0)]
    np.shape(S_active.values)
    #print(S_active.shape)
    #print(S_active)
    reactions[full_rxn] = reactions[left] + ' = ' + reactions[right]
    
    
    # In[6]:
    
    
    if (1):   
        for idx in reactions.index:
            #print(idx,flush=True)
            boltzmann_rxn_str = reactions.loc[idx,'Full Rxn']
            if re.search(':',boltzmann_rxn_str):
                all_cmprts = re.findall(':\S+', boltzmann_rxn_str)
                [s.replace(':', '') for s in all_cmprts] # remove all the ':'s 
                different_compartments = 0
                for cmpt in all_cmprts:
                    if not re.match(all_cmprts[0],cmpt):
                        different_compartments = 1
                if ((not different_compartments) and (reactions[left_compartment].isnull or reactions[right_compartment].isnull)):
                    reactions.loc[idx,left_compartment] = cmpt
                    reactions.loc[idx,right_compartment] = cmpt
                    reactions.loc[idx,same_compartment] = True
                if different_compartments:
                    reactions.loc[idx,same_compartment] = False
            else:
                if (reactions.loc[idx,left_compartment] == reactions.loc[idx,right_compartment]):
                    reactions.loc[idx,same_compartment] = True
                else:
                    reactions.loc[idx,same_compartment] = False
    #print(reactions)                
                
    reactions.loc['CSm',deltag0] = -35.8057
    reactions.loc['ACONTm',deltag0] = 7.62962
    reactions.loc['ICDHxm',deltag0] = -2.6492
    reactions.loc['AKGDam',deltag0] = -37.245
    reactions.loc['SUCOASm',deltag0] = 2.01842
    reactions.loc['SUCD1m',deltag0] = -379.579
    reactions.loc['FUMm',deltag0] = -3.44728
    reactions.loc['MDHm',deltag0] = 29.5419
    reactions.loc['GAPD',deltag0] = 5.24202
    reactions.loc['PGK',deltag0] = -18.5083
    reactions.loc['TPI',deltag0] = 5.49798
    reactions.loc['FBA',deltag0] = 21.4506
    reactions.loc['PYK',deltag0] = -27.3548
    reactions.loc['PGM',deltag0] = 4.17874
    reactions.loc['ENO',deltag0] = -4.0817
    reactions.loc['HEX1',deltag0] = -16.7776
    reactions.loc['PGI',deltag0] = 2.52206
    reactions.loc['PFK',deltag0] = -16.1049
    reactions.loc['PYRt2m',deltag0] = -RT*np.log(10)
    reactions.loc['PDHm',deltag0] = -44.1315
    reactions.loc['G6PDH2r',deltag0] = -3.89329
    reactions.loc['PGL',deltag0] = -22.0813
    reactions.loc['GND',deltag0] = 2.32254
    reactions.loc['RPE',deltag0] = -3.37
    reactions.loc['RPI',deltag0] = -1.96367
    reactions.loc['TKT2',deltag0] = -10.0342
    reactions.loc['TALA',deltag0] = -0.729232
    reactions.loc['FBA3',deltag0] = 13.9499
    reactions.loc['PFK_3',deltag0] = -9.33337
    reactions.loc['TKT1',deltag0] = -3.79303
    reactions.loc['Glutamine-fructose-6-phosphate aminotransferase',deltag0] = -13.4054
    reactions.loc['Glucosamine-6-phosphate N-acetyltransferase',deltag0] = -23.7065
    reactions.loc['N-acetylglucosamine-phosphate mutase',deltag0] = 4.65558
    reactions.loc['UDP N-acetylglucosamine pyrophosphorylase',deltag0] = 0.539147
    reactions.loc['Hyaluronan Synthase',deltag0] = -14.4143
    reactions.loc['Phosphoglucomutase',deltag0] = 7.41831
    reactions.loc['UTP-glucose-1-phosphate uridylyltransferase',deltag0] = 1.51043
    reactions.loc['1,3-beta-glucan synthase',deltag0] = -11.534
    reactions.loc['Citrate-oxaloacetate exchange',deltag0] = 0
    reactions.loc['CITRATE_LYASE',deltag0] = 10.0299
    reactions.loc['MDHc',deltag0] = -29.5419
    reactions.loc['MDH-NADPc',deltag0] = -29.7376
    reactions.loc['ME1c',deltag0] = 4.56191
    reactions.loc['ME2c',deltag0] = 4.75763
    reactions.loc['Pyruvate Carboxylase',deltag0] = -0.795825
    reactions.loc['Aldose 1-epimerase',deltag0] = 0
    reactions.loc['HEX1a',deltag0] = -16.7776
    reactions.loc['PGI-1',deltag0] = 2.52206
        
    reactions.loc['CSm',deltag0_sigma] = 0.930552
    reactions.loc['ACONTm',deltag0_sigma] = 0.733847
    reactions.loc['ICDHxm',deltag0_sigma] = 7.62095
    reactions.loc['AKGDam',deltag0_sigma] = 7.97121
    reactions.loc['SUCOASm',deltag0_sigma] = 1.48197
    reactions.loc['SUCD1m',deltag0_sigma] = 7.8098
    reactions.loc['FUMm',deltag0_sigma] = 0.607693
    reactions.loc['MDHm',deltag0_sigma] = 0.422376
    reactions.loc['GAPD',deltag0_sigma] = 0.895659
    reactions.loc['PGK',deltag0_sigma] = 0.889982
    reactions.loc['TPI',deltag0_sigma] = 0.753116
    reactions.loc['FBA',deltag0_sigma] = 0.87227
    reactions.loc['PYK',deltag0_sigma] = 0.939774
    reactions.loc['PGM',deltag0_sigma] = 0.65542
    reactions.loc['ENO',deltag0_sigma] = 0.734193
    reactions.loc['HEX1',deltag0_sigma] = 0.715237
    reactions.loc['PGI',deltag0_sigma] = 0.596775
    reactions.loc['PFK',deltag0_sigma] = 0.886629
    reactions.loc['PYRt2m',deltag0_sigma] = 0
    reactions.loc['PDHm',deltag0_sigma] = 7.66459
    reactions.loc['G6PDH2r',deltag0_sigma] = 2.11855
    reactions.loc['PGL',deltag0_sigma] = 2.62825
    reactions.loc['GND',deltag0_sigma] = 7.60864
    reactions.loc['RPE',deltag0_sigma] = 1.16485
    reactions.loc['RPI',deltag0_sigma] = 1.16321
    reactions.loc['TKT2',deltag0_sigma] = 2.08682
    reactions.loc['TALA',deltag0_sigma] = 1.62106
    reactions.loc['FBA3',deltag0_sigma] = 7.36854
    reactions.loc['PFK_3',deltag0_sigma] = 7.3671
    reactions.loc['TKT1',deltag0_sigma] = 2.16133
    reactions.loc['Glutamine-fructose-6-phosphate aminotransferase',deltag0_sigma] = 3.08807
    reactions.loc['Glucosamine-6-phosphate N-acetyltransferase',deltag0_sigma] = 4.26738
    reactions.loc['N-acetylglucosamine-phosphate mutase',deltag0_sigma] = 3.06369
    reactions.loc['UDP N-acetylglucosamine pyrophosphorylase',deltag0_sigma] = 3.12527
    reactions.loc['Hyaluronan Synthase',deltag0_sigma] = 9.46851
    reactions.loc['Phosphoglucomutase',deltag0_sigma] = 1.09029
    reactions.loc['UTP-glucose-1-phosphate uridylyltransferase',deltag0_sigma] = 1.14644
    reactions.loc['1,3-beta-glucan synthase',deltag0_sigma] = 7.80447
    reactions.loc['Citrate-oxaloacetate exchange',deltag0_sigma] = 0
    reactions.loc['CITRATE_LYASE',deltag0_sigma] = 0.928303
    reactions.loc['MDHc',deltag0_sigma] = 0.422376
    reactions.loc['MDH-NADPc',deltag0_sigma] = 0.531184
    reactions.loc['ME1c',deltag0_sigma] = 7.60174
    reactions.loc['ME2c',deltag0_sigma] = 7.61042
    reactions.loc['Pyruvate Carboxylase',deltag0_sigma] = 7.60419
    reactions.loc['Aldose 1-epimerase',deltag0_sigma] = 0
    reactions.loc['HEX1a',deltag0_sigma] = 0.715237
    reactions.loc['PGI-1',deltag0_sigma] = 0.596775
        # ## Calculate Standard Free Energies of Reaction 
    # In[49]:
    
    conc = 'Conc'
    variable = 'Variable'
    conc_exp = 'Conc_Experimental'
    metabolites = pd.DataFrame(index = S_active.columns, columns=[conc,conc_exp,variable])
    metabolites[conc] = 0.001
    metabolites[variable] = True
    
    # Set the fixed metabolites:
    metabolites.loc['ATP:MITOCHONDRIA',conc] = 9.600000e-03
    metabolites.loc['ATP:MITOCHONDRIA',variable] = False
    metabolites.loc['ADP:MITOCHONDRIA',conc] = 5.600000e-04
    metabolites.loc['ADP:MITOCHONDRIA',variable] = False
    metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA',conc] = 2.000000e-02
    metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA',variable] = False
    
    metabolites.loc['ATP:CYTOSOL',conc] = 9.600000e-03
    metabolites.loc['ATP:CYTOSOL',variable] = False
    metabolites.loc['ADP:CYTOSOL',conc] = 5.600000e-04
    metabolites.loc['ADP:CYTOSOL',variable] = False
    metabolites.loc['ORTHOPHOSPHATE:CYTOSOL',conc] = 2.000000e-02
    metabolites.loc['ORTHOPHOSPHATE:CYTOSOL',variable] = False
    
    metabolites.loc['UTP:CYTOSOL',conc] = 9.600000e-03
    metabolites.loc['UTP:CYTOSOL',variable] = False
    metabolites.loc['UDP:CYTOSOL',conc] = 5.600000e-04
    metabolites.loc['UDP:CYTOSOL',variable] = False
    metabolites.loc['DIPHOSPHATE:CYTOSOL',conc] = 2.000000e-02
    metabolites.loc['DIPHOSPHATE:CYTOSOL',variable] = False
    
    metabolites.loc['NADH:MITOCHONDRIA',conc] = 8.300000e-05 
    metabolites.loc['NADH:MITOCHONDRIA',variable] = False
    metabolites.loc['NAD+:MITOCHONDRIA',conc] = 2.600000e-03
    metabolites.loc['NAD+:MITOCHONDRIA',variable] = False
    
    metabolites.loc['NADH:CYTOSOL',conc] = 8.300000e-05 
    metabolites.loc['NADH:CYTOSOL',variable] = False
    metabolites.loc['NAD+:CYTOSOL',conc] = 2.600000e-03
    metabolites.loc['NAD+:CYTOSOL',variable] = False
    
    metabolites.loc['NADPH:CYTOSOL',conc] = 8.300000e-05 #also use 1.2e-4
    metabolites.loc['NADPH:CYTOSOL',variable] = False
    metabolites.loc['NADP+:CYTOSOL',conc] = 2.600000e-03 #also use 2.1e-6
    metabolites.loc['NADP+:CYTOSOL',variable] = False
    
    metabolites.loc['COA:MITOCHONDRIA',conc] = 1.400000e-03
    metabolites.loc['COA:MITOCHONDRIA',variable] = False
    metabolites.loc['COA:CYTOSOL',conc] = 1.400000e-03
    metabolites.loc['COA:CYTOSOL',variable] = False
    
    metabolites.loc['CO2:MITOCHONDRIA',conc] = 1.000000e-04
    metabolites.loc['CO2:MITOCHONDRIA',variable] = False
    metabolites.loc['CO2:CYTOSOL',conc] = 1.000000e-04
    metabolites.loc['CO2:CYTOSOL',variable] = False 
    
    metabolites.loc['H2O:MITOCHONDRIA',conc] = 55.5
    metabolites.loc['H2O:MITOCHONDRIA',variable] = False
    metabolites.loc['H2O:CYTOSOL',conc] = 55.5
    metabolites.loc['H2O:CYTOSOL',variable] = False 
    
    metabolites.loc['BETA-D-GLUCOSE:CYTOSOL',conc] = 2.0e-03
    metabolites.loc['BETA-D-GLUCOSE:CYTOSOL',variable] = False 
    
    metabolites.loc["CHITOBIOSE:CYTOSOL",conc] = 2.0e-09
    metabolites.loc["CHITOBIOSE:CYTOSOL",variable] = False 
    
    metabolites.loc['1,3-BETA-D-GLUCAN:CYTOSOL',conc] = 2.0e-09
    metabolites.loc['1,3-BETA-D-GLUCAN:CYTOSOL',variable] = False 
    
    metabolites.loc['L-GLUTAMINE:CYTOSOL',conc] = 2.0e-03
    metabolites.loc['L-GLUTAMINE:CYTOSOL',variable] = False 
    metabolites.loc['L-GLUTAMATE:CYTOSOL',conc] = 2.0e-04
    metabolites.loc['L-GLUTAMATE:CYTOSOL',variable] = False
    metabolites.loc['CELLOBIOSE:CYTOSOL',conc] = 2.0e-04
    metabolites.loc['CELLOBIOSE:CYTOSOL',variable] = False 
    
    metabolites.loc['N-ACETYL-D-GLUCOSAMINE:CYTOSOL',conc] = 1.0e-08
    metabolites.loc['N-ACETYL-D-GLUCOSAMINE:CYTOSOL',variable] = False 
    
    
    #When loading experimental concentrations, first copy current 
    #rule of thumb then overwrite with data values.
    metabolites[conc_exp] = metabolites[conc]
    metabolites.loc['2-OXOGLUTARATE:MITOCHONDRIA',conc_exp] = 0.0000329167257825644
    metabolites.loc['ISOCITRATE:MITOCHONDRIA',conc_exp] = 0.000102471198594958
    metabolites.loc['PHOSPHOENOLPYRUVATE:CYTOSOL',conc_exp] = 0.0000313819870767023
    metabolites.loc['D-GLYCERALDEHYDE-3-PHOSPHATE:CYTOSOL',conc_exp] = 0.0000321630949358949
    metabolites.loc['FUMARATE:MITOCHONDRIA',conc_exp] = 0.00128926137523035
    metabolites.loc['L-GLUTAMINE:CYTOSOL',conc_exp] = 0.0034421144256392
    metabolites.loc['PYRUVATE:MITOCHONDRIA',conc_exp] = 0.0000778160985710288
    metabolites.loc['PYRUVATE:CYTOSOL',conc_exp] = 0.0000778160985710288
    metabolites.loc['D-FRUCTOSE_6-PHOSPHATE:CYTOSOL',conc_exp] = 0.00495190614473117
    metabolites.loc['D-RIBOSE-5-PHOSPHATE:CYTOSOL',conc_exp] = 0.0000849533575412862
    metabolites.loc['CITRATE:MITOCHONDRIA',conc_exp] = 0.000485645834537379
    metabolites.loc['CITRATE:CYTOSOL',conc_exp] = 0.000485645834537379
    metabolites.loc['(S)-MALATE:MITOCHONDRIA',conc_exp] = 0.00213827060541153
    metabolites.loc['(S)-MALATE:CYTOSOL',conc_exp] = 0.00213827060541153
    metabolites.loc['SEDOHEPTULOSE_7-PHOSPHATE:CYTOSOL',conc_exp] = 0.00203246193132095
    metabolites.loc['D-RIBULOSE-5-PHOSPHATE:CYTOSOL',conc_exp] = 0.000468439334729429
    metabolites.loc['L-GLUTAMATE:CYTOSOL',conc_exp] = 0.00557167476932484
    metabolites.loc['SUCCINATE:MITOCHONDRIA',conc_exp] = 0.000942614767220802
    metabolites.loc['D-XYLULOSE-5-PHOSPHATE:CYTOSOL',conc_exp] = 0.000468439334729429
    
    nvariables = metabolites[metabolites[variable]].count()
    nvar = nvariables[variable]
    
    metabolites.sort_values(by=variable, axis=0,ascending=False, inplace=True,)
    #print(metabolites)
    
    
    #%%
    nvariables = metabolites[metabolites[variable]].count()
    nvar = nvariables[variable]
    
    metabolites.sort_values(by=variable, axis=0,ascending=False, inplace=True,)
    #print(metabolites)
    
    # ## Prepare model for optimization
    
    # - Adjust S Matrix to use only reactions with activity > 0, if necessary.
    # - Water stoichiometry in the stiochiometric matrix needs to be set to zero since water is held constant.
    # - The initial concentrations of the variable metabolites are random.
    # - All concentrations are changed to log counts.
    # - Equilibrium constants are calculated from standard free energies of reaction.
    # - R (reactant) and P (product) matrices are derived from S.
    
    # Make sure all the indices and columns are in the correct order:
    active_reactions = reactions[reactions[enzyme_level] > 0.0]
    #print(reactions)
    #print(metabolites.index)
    Sactive_index = S_active.index
    
    active_reactions.reindex(index = Sactive_index, copy = False)
    S_active = S_active.reindex(columns = metabolites.index, copy = False)
    S_active['H2O:MITOCHONDRIA'] = 0
    S_active['H2O:CYTOSOL'] = 0
    
    #####################################
    #####################################
    #THIS IS MAKING FLUX -> 0.0
    where_are_NaNs = np.isnan(S_active)
    S_active[where_are_NaNs] = 0
    
    #print(S_active[:])
    
    S_mat = S_active.values
    
    Keq_constant = np.exp(-active_reactions[deltag0].astype('float')/RT)
    #print(Keq_constant)
    Keq_constant = Keq_constant.values
    
    P_mat = np.where(S_mat>0,S_mat,0)
    R_back_mat = np.where(S_mat<0, S_mat, 0)
    E_regulation = np.ones(Keq_constant.size) # THis is the vector of enzyme activities, Range: 0 to 1.
    
    
    mu0 = 1 #Dummy parameter for now; reserved for free energies of formation
    
    #If no experimental data  is available, we can estimate using 'rule-of-thumb' data at 0.001
    
    conc_type=conc
    if (use_experimental_data):
        print("USING EXPERIMENTAL DATA")
        conc_type=conc_exp
    
    variable_concs = np.array(metabolites[conc_type].iloc[0:nvar].values, dtype=np.float64)
    v_log_concs = -10 + 10*np.random.rand(nvar) #Vary between 1 M to 1.0e-10 M
    v_concs = np.exp(v_log_concs)
    v_log_counts_stationary = np.log(v_concs*Concentration2Count)
    v_log_counts = v_log_counts_stationary
    #print(v_log_counts)
    
    fixed_concs = np.array(metabolites[conc_type].iloc[nvar:].values, dtype=np.float64)
    fixed_counts = fixed_concs*Concentration2Count
    f_log_counts = np.log(fixed_counts)
    
    complete_target_log_counts = np.log(Concentration2Count * metabolites[conc_type].values)
    target_v_log_counts = complete_target_log_counts[0:nvar]
    target_f_log_counts = complete_target_log_counts[nvar:]
    
    #WARNING:::::::::::::::CHANGE BACK TO ZEROS
    delta_increment_for_small_concs = (10**-50)*np.zeros(metabolites[conc_type].values.size);
    
    variable_concs_begin = np.array(metabolites[conc_type].iloc[0:nvar].values, dtype=np.float64)
    
    #%% Basic test
    
    v_log_counts = np.log(variable_concs_begin*Concentration2Count)
    
    #r_log_counts = -10 + 10*np.random.rand(v_log_counts.size)
    #v_log_counts = r_log_counts
    #print('====== Without adjusting Keq_constant ======')
    
    
    E_regulation = np.ones(Keq_constant.size) # THis is the vector of enzyme activities, Range: 0 to 1.
    nvar = v_log_counts.size
    #WARNING: INPUT LOG_COUNTS TO ALL FUNCTIONS. CONVERSION TO COUNTS IS DONE INTERNALLY
    res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts, method='lm',xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation))
    if (res_lsq1.success==False):
        res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts,method='dogbox',xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation))
        if (res_lsq1.success==False):
            res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts,method='trf',xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation))
            
    
    rxn_flux = max_entropy_functions.oddsDiff(res_lsq1.x, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation)
    

    # In[ ]:
    begin_log_metabolites = np.append(res_lsq1.x,f_log_counts)
    ##########################################
    ##########################################
    #####################TESTER###############
    
    E_regulation = np.ones(Keq_constant.size) # THis is the vector of enzyme activities, Range: 0 to 1.
    log_metabolites = np.append(res_lsq1.x,f_log_counts)
    KQ_f = max_entropy_functions.odds(log_metabolites,mu0,S_mat, R_back_mat, P_mat, delta_increment_for_small_concs,Keq_constant)
    
    
    Keq_inverse = np.power(Keq_constant,-1)
    KQ_r = max_entropy_functions.odds(log_metabolites,mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1)
    
    [RR,Jac] = max_entropy_functions.calc_Jac2(res_lsq1.x, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, E_regulation)
    A = max_entropy_functions.calc_A(res_lsq1.x,f_log_counts, S_mat, Jac, E_regulation )
    
    [ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR)
    
    React_Choice=6
    
    newE = max_entropy_functions.calc_reg_E_step(E_regulation,React_Choice, nvar, res_lsq1.x, f_log_counts, complete_target_log_counts, 
                           S_mat, A, rxn_flux,KQ_f)
        
        
    delta_S_metab = max_entropy_functions.calc_deltaS_metab(res_lsq1.x, target_v_log_counts);
    
    ipolicy = 7 #use ipolicy=1 or 4
    reaction_choice = max_entropy_functions.get_enzyme2regulate(ipolicy, delta_S_metab, ccc, KQ_f, E_regulation, res_lsq1.x)                                                        

     #%%
    
    #device = torch.device("cpu")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Using device:', device)

    
    #set variables in ML program
    me.device=device
    me.v_log_counts_static = v_log_counts_stationary
    me.target_v_log_counts = target_v_log_counts
    me.complete_target_log_counts = complete_target_log_counts
    me.Keq_constant = Keq_constant
    me.f_log_counts = f_log_counts
    
    me.P_mat = P_mat
    me.R_back_mat = R_back_mat
    me.S_mat = S_mat
    me.delta_increment_for_small_concs = delta_increment_for_small_concs
    me.nvar = nvar
    me.mu0 = mu0
    
    me.gamma = gamma
    me.num_rxns = Keq_constant.size
    me.penalty_reward_scalar=penalty_reward_scalar
    
        
    #%%
    N, D_in, H, D_out = 1, Keq_constant.size,  20*Keq_constant.size, 1

    # Create random Tensors to hold inputs and outputs
    x = torch.rand(1000, D_in, device=device)

    nn_model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H),
            torch.nn.Tanh(),
            torch.nn.Linear(H,D_out)).to(device)

    loss_fn = torch.nn.MSELoss(reduction='sum')
    #learning_rate=5e-6
    #optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9)
    optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9)
    
    #optimizer = torch.optim.Adam(nn_model.parameters(), lr=3e-4)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=200, verbose=True, min_lr=1e-10,cooldown=10,threshold=1e-5)
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100, verbose=True, min_lr=1e-10,cooldown=10,threshold=1e-4)
    
    #%% SGD UPDATE TEST

     #attempted iterations to update theta_linear
    v_log_counts = v_log_counts_stationary.copy()
    episodic_loss = []
    episodic_loss_max = []
    episodic_epr = []
    episodic_reward = []
    
    episodic_nn_step = []
    episodic_random_step = []
    
    epsilon_greedy_init = epsilon
    
    final_states=np.zeros(Keq_constant.size)
    final_KQ_fs=np.zeros(Keq_constant.size)
    final_KQ_rs=np.zeros(Keq_constant.size)
    epr_per_state=[]
    was_state_terminal=[]
    

    for update in range(0,updates):
        
        x_changing = 1*torch.rand(1000, D_in, device=device)
    
        
        #generate state to use
        state_sample = np.zeros(Keq_constant.size)
        for sample in range(0,len(state_sample)):
            state_sample[sample] = np.random.uniform(1,1)
    
        #annealing test
        if ((update % eps_threshold== 0) and (update != 0)):
            epsilon=epsilon/2
            print("RESET epsilon ANNEALING")
            print(epsilon)
    
        prediction_x_changing_previous = nn_model(x_changing)
        #nn_model.train()
        [sum_reward, average_loss,max_loss,final_epr,final_state,final_KQ_f,final_KQ_r, reached_terminal_state,\
         random_steps_taken,nn_steps_taken] = me.sarsa_n(nn_model,loss_fn, optimizer, scheduler, state_sample, n_back_step, epsilon)
        
        print("EPISODE")
        print(update)
        print("MAXIMUM LAYER WEIGHTS")
        for layer in nn_model.modules():
            try:
                print(torch.max(layer.weight))
            except:
                print("")
        print('random,nn steps')
        print(random_steps_taken)
        print(nn_steps_taken)
        if (reached_terminal_state):
            was_state_terminal.append(1)
        else:
            was_state_terminal.append(0)
        

        final_states = np.vstack((final_states,final_state))
        final_KQ_fs = np.vstack((final_KQ_fs,final_KQ_f))
        final_KQ_rs = np.vstack((final_KQ_rs,final_KQ_r))
        epr_per_state.append(final_epr)
            
        episodic_epr.append(final_epr)
        
        episodic_loss.append(average_loss)
        
        episodic_loss_max.append(max_loss)
        episodic_reward.append(sum_reward)
        episodic_nn_step.append(nn_steps_taken)
        episodic_random_step.append(random_steps_taken)
        scheduler.step(average_loss)
        print("TOTAL REWARD")
        print(sum_reward)
        print("ave loss")
        print(average_loss)
        print("max_loss")
        print(max_loss)
        
        print(optimizer.state_dict)
        print(scheduler.state_dict())
        prediction_x_changing = nn_model(x_changing)
        
        total_prediction_changing_diff = sum(abs(prediction_x_changing - prediction_x_changing_previous))
        print("TOTALPREDICTION")
        print(total_prediction_changing_diff)

        np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+
                    'temp_episodic_loss_'+str(n_back_step) +
                    '_lr'+str(learning_rate)+
                    '_'+str(eps_threshold)+
                    '_eps'+str(epsilon_greedy_init)+
                    '_'+str(sim_number)+
                    '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                    '_use_experimental_metab_'+str(int(use_experimental_data))+ 
                    '.txt', episodic_loss, fmt='%f')

        np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+
                    'temp_epr_'+str(n_back_step) +
                    '_lr'+str(learning_rate)+
                    '_'+str(eps_threshold)+
                    '_eps'+str(epsilon_greedy_init)+
                    '_'+str(sim_number)+
                    '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                    '_use_experimental_metab_'+str(int(use_experimental_data))+
                    '.txt', episodic_epr, fmt='%f')

        np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+
                    'temp_episodic_reward_'+str(n_back_step)+
                    '_lr'+str(learning_rate)+
                    '_'+str(eps_threshold)+
                    '_eps'+str(epsilon_greedy_init)+'_'+str(sim_number)+
                    '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                    '_use_experimental_metab_'+str(int(use_experimental_data))+
                    '.txt', episodic_reward, fmt='%f')
                    
        np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+
                    'temp_final_states_'+str(n_back_step)+
                    '_lr'+str(learning_rate)+
                    '_'+str(eps_threshold)+
                    '_eps'+str(epsilon_greedy_init)+'_'+str(sim_number)+
                    '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                    '_use_experimental_metab_'+str(int(use_experimental_data))+
                    '.txt', final_states, fmt='%f')
        
        if (update > 200):
            if ((max(episodic_loss[-100:])-min(episodic_loss[-100:]) < 0.025) and (update > 350)):
                break
        
    
    #%%
    #gamma9 -> gamma=0.9
    #n8 -> n_back_step=8
    #k5 -> E=E-E/5 was used 
    #lr5e6 -> begin lr=0.5*e-6
    
    torch.save(nn_model, cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'complete_model_gly_tca_gog_gamma9_n'+str(n_back_step)+'_k5_'\
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_' +str(int(use_experimental_data))+
                '_sim'+str(sim_number) + '.pth')
    
    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'episodic_terminal_state_gamma9_n'+str(n_back_step)+'_k5_'
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+
                '.txt', was_state_terminal, fmt='%f')

    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'episodic_loss_gamma9_n'+str(n_back_step)+'_k5_'
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+
                '.txt', episodic_loss, fmt='%f')

    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'episodic_loss_max_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+'_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+
                '.txt', episodic_loss_max, fmt='%f')

    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'episodic_reward_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
               '_threshold'+str(eps_threshold)+
               '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+
                '.txt', episodic_reward, fmt='%f')
    
    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'final_states_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+\
                '.txt', final_states, fmt='%f')    

    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'final_KQF_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+\
                '.txt', final_KQ_fs, fmt='%f')   

    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'final_KQR_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_'+str(int(use_experimental_data))+
                '_sim'+str(sim_number)+\
                '.txt', final_KQ_rs, fmt='%f')

    np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+
                'epr_per_state_gamma9_n'+str(n_back_step)+'_k5_'+
                '_lr'+str(learning_rate)+
                '_threshold'+str(eps_threshold)+
                '_eps'+str(epsilon_greedy_init)+
                '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+
                '_use_experimental_metab_' +str(int(use_experimental_data))+
                '_sim'+str(sim_number)+
                '.txt', epr_per_state, fmt='%f')