def policy_function(nn_model, state, v_log_counts_path, *args): #last input argument should be epsilon for use when using greedy-epsilon algorithm. varargin = args nargin = len(varargin) epsilon_greedy = 0.0 if (nargin == 1): epsilon_greedy = varargin[0] used_random_step = False rxn_choices = [i for i in range(num_rxns)] unif_rand = np.random.uniform(0, 1) if ((unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)): used_random_step = True random_choice = random.choice(rxn_choices) final_action = random_choice used_random_step = 1 res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method='lm', xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) final_v_log_counts = res_lsq.x new_log_metabolites = np.append(final_v_log_counts, f_log_counts) final_state = state.copy() newE = max_entropy_functions.calc_new_enzyme_simple( state, final_action) final_state = state.copy() #DO NOT MODIFY ORIGINAL STATE final_state[final_action] = newE final_delta_s_metab = max_entropy_functions.calc_deltaS_metab( final_v_log_counts, target_v_log_counts) final_KQ_f = max_entropy_functions.odds( new_log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) final_KQ_r = max_entropy_functions.odds( new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) value_current_state = state_value( nn_model, torch.from_numpy(final_state).float().to(device)) value_current_state = value_current_state.item() final_reward = reward_value(final_v_log_counts, v_log_counts_path, \ final_KQ_f, final_KQ_r,\ final_state, state) else: #In this, we must choose base on the best prediction base on environmental feedback v_log_counts = v_log_counts_path log_metabolites = np.append(v_log_counts, f_log_counts) rxn_flux = max_entropy_functions.oddsDiff( v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state) KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) [RR, Jac ] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, state) A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat, Jac, state) delta_S_metab = max_entropy_functions.calc_deltaS_metab( v_log_counts, target_v_log_counts) [ccc, fcc] = max_entropy_functions.conc_flux_control_coeff( nvar, A, S_mat, rxn_flux, RR) indices = [i for i in range(0, len(Keq_constant))] #minimal varialbes to run optimization #variables=[state, v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant] #with Pool() as pool: # async_result = pool.starmap(potential_step, zip(indices, repeat(variables))) # pool.close() # pool.join() #end = time.time() #print(v_log_counts_path) async_result = pstep.dispatch(rxn_choices, S_mat, R_back_mat, P_mat, Keq_constant, state, f_log_counts, v_log_counts_path) #print(async_result[0]) temp_action_value = -np.inf for act in range(0, len(async_result)): #new_v_log_counts = async_result[act][0] #output from poo new_v_log_counts = async_result[act] new_log_metabolites = np.append(new_v_log_counts, f_log_counts) trial_state_sample = state.copy() newE = max_entropy_functions.calc_new_enzyme_simple(state, act) trial_state_sample = state.copy() #DO NOT MODIFY ORIGINAL STATE trial_state_sample[act] = newE new_delta_S_metab = max_entropy_functions.calc_deltaS_metab( new_v_log_counts, target_v_log_counts) KQ_f_new = max_entropy_functions.odds( new_log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) KQ_r_new = max_entropy_functions.odds( new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) value_current_state = state_value( nn_model, torch.from_numpy(trial_state_sample).float().to(device)) value_current_state = value_current_state.item() current_reward = reward_value(new_v_log_counts, v_log_counts, \ KQ_f_new, KQ_r_new,\ trial_state_sample, state) #print(current_reward) action_value = current_reward + (gamma) * value_current_state if (action_value > temp_action_value): #then a new action is the best. temp_action_value = action_value #set best output variables final_action = act final_reward = current_reward final_KQ_f = KQ_f_new final_KQ_r = KQ_r_new final_v_log_counts = new_v_log_counts final_state = trial_state_sample final_delta_s_metab = new_delta_S_metab #print(final_state) #print('final_delta_s_metab') #print(final_delta_s_metab) return [final_action,\ final_reward,\ final_KQ_f,\ final_KQ_r,\ final_v_log_counts,\ final_state,\ final_delta_s_metab,used_random_step,0.0,0.0]
delta_increment_for_small_concs, Keq_constant, E_regulation) KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) epr = max_entropy_functions.entropy_production_rate( KQ_f, KQ_r, E_regulation) delta_S_metab = max_entropy_functions.calc_deltaS_metab( v_log_counts, target_v_log_counts) delta_S = max_entropy_functions.calc_deltaS(v_log_counts, target_v_log_counts, f_log_counts, S_mat, KQ_f) [RR, Jac] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, E_regulation) A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat, Jac, E_regulation) [ccc, fcc ] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR)
def sarsa_n(nn_model, loss_fn, optimizer, scheduler, state_sample, n_back_step, epsilon_greedy): total_time_cpu = 0 total_time_nn = 0 #reset for each episode. policy will add random_steps_taken = 0 nn_steps_taken = 0 final_state = [] final_KQ_f = [] final_KQ_r = [] reached_terminal_state = False average_loss = [] final_reward = 0 sum_reward_episode = 0 end_of_path = 5000 #this is the maximum length a path can take KQ_f_matrix = np.zeros(shape=(num_rxns, end_of_path + 1)) KQ_r_matrix = np.zeros(shape=(num_rxns, end_of_path + 1)) states_matrix = np.zeros(shape=(num_rxns, end_of_path + 1)) delta_S_metab_matrix = np.zeros(shape=(nvar, end_of_path + 1)) v_log_counts_matrix = np.zeros(shape=(nvar, end_of_path + 1)) states_matrix[:, 0] = state_sample res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_static, method='lm', xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0])) v_log_counts_matrix[:, 0] = res_lsq.x.copy() log_metabolites = np.append(v_log_counts_matrix[:, 0], f_log_counts) rxn_flux_init = max_entropy_functions.oddsDiff( v_log_counts_matrix[:, 0], f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0]) KQ_f_matrix[:, 0] = max_entropy_functions.odds( log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r_matrix[:, 0] = max_entropy_functions.odds( log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) delta_S_metab_matrix[:, 0] = max_entropy_functions.calc_deltaS_metab( v_log_counts_matrix[:, 0], target_v_log_counts) reward_vec = np.zeros(end_of_path + 1) reward_vec[0] = 0.0 rxn_flux_path = rxn_flux_init.copy() for t in range(0, end_of_path): if (t < end_of_path): #This represents the choice from the current policy. [React_Choice,reward_vec[t+1],\ KQ_f_matrix[:,t+1], KQ_r_matrix[:,t+1],\ v_log_counts_matrix[:,t+1],\ states_matrix[:,t+1],\ delta_S_metab_matrix[:,t+1],\ used_random_step,time_cpu,time_nn] = policy_function(nn_model, states_matrix[:,t], v_log_counts_matrix[:,t], epsilon_greedy)#regulate each reaction. total_time_cpu += time_cpu total_time_nn += time_nn if (used_random_step): random_steps_taken += 1 else: nn_steps_taken += 1 if (React_Choice == -1): print("bad reaction choice, using action = -1") break rxn_flux_path = max_entropy_functions.oddsDiff( v_log_counts_matrix[:, t + 1], f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, t + 1]) if (np.max(rxn_flux_path) < 1.0): print("draining flux") break epr_path = max_entropy_functions.entropy_production_rate( KQ_f_matrix[:, t + 1], KQ_r_matrix[:, t + 1], states_matrix[:, t + 1]) sum_reward_episode += reward_vec[t + 1] current_state = states_matrix[:, t + 1].copy() #We stop the path if we have no more positive loss function values, or if we revisit a state. if ((delta_S_metab_matrix[:, t + 1] <= 0.0).all()): end_of_path = t + 1 #stops simulation at step t+1 reached_terminal_state = True final_state = states_matrix[:, t + 1].copy() final_KQ_f = KQ_f_matrix[:, t + 1].copy() final_KQ_r = KQ_r_matrix[:, t + 1].copy() final_reward = epr_path print( "**************************************Path Length ds<0******************************************" ) print(end_of_path) print("Final STATE") print(states_matrix[:, t + 1]) print(rxn_flux_path) print("original epr") print(epr_path) print("all rewards") print(reward_vec[0:t + 1]) ##BEGIN LEARNING tau = t - n_back_step + 1 if (tau >= 0): #breakpoint() estimate_value = torch.zeros(1, device=device) for i in range(tau + 1, min(tau + n_back_step, end_of_path) + 1): estimate_value += (gamma**(i - tau - 1)) * reward_vec[i] if ((tau + n_back_step) < end_of_path): begin_nn = time.time() value_tau_n = state_value( nn_model, torch.from_numpy( states_matrix[:, tau + n_back_step]).float().to(device)) end_nn = time.time() total_time_nn += end_nn - begin_nn estimate_value += (gamma**(n_back_step)) * value_tau_n begin_nn = time.time() value_tau = state_value( nn_model, torch.from_numpy(states_matrix[:, tau]).float().to(device)) end_nn = time.time() total_time_nn += end_nn - begin_nn if (value_tau.requires_grad == False): breakpoint() if (estimate_value.requires_grad == True): estimate_value.detach_() #WARNING #loss ordering should be input with requires_grad == True, #followed by target with requires_grad == False #breakpoint() begin_nn = time.time() loss = loss_fn(value_tau, estimate_value) #MSE optimizer.zero_grad() loss.backward() clipping_value = 1.0 torch.nn.utils.clip_grad_norm_(nn_model.parameters(), clipping_value) optimizer.step() end_nn = time.time() total_time_nn += end_nn - begin_nn average_loss.append(loss.item()) if (tau >= (end_of_path - 1)): break #after episode is finished, take average loss average_loss_episode = np.mean(average_loss) print("index of max error on path") print(average_loss.index(max(average_loss))) return [sum_reward_episode, average_loss_episode,max(average_loss),final_reward, final_state, final_KQ_f,final_KQ_r,\ reached_terminal_state, random_steps_taken,nn_steps_taken]
def run(argv): try: os.makedirs(cwd + '/GLYCOLYSIS_TCA_GOGAT/data') except FileExistsError: # directory already exists pass try: os.makedirs(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data') except FileExistsError: # directory already exists pass pd.set_option('display.max_columns', None, 'display.max_rows', None) ###Default Values #If no experimental data is available, we can estimate using 'rule-of-thumb' data at 0.001 use_experimental_data = False learning_rate = 1e-8 #3rd epsilon = 0.5 #4th eps_threshold = 25 #5th gamma = 0.9 #6th updates = 500 penalty_reward_scalar = 0.0 #load input total = len(sys.argv) cmdargs = str(sys.argv) print("The total numbers of args passed to the script: %d " % total) print("Args list: %s " % cmdargs) print("Script name: %s" % str(sys.argv[0])) for i in range(total): print("Argument # %d : %s" % (i, str(sys.argv[i]))) sim_number = int(sys.argv[1]) n_back_step = int(sys.argv[2]) if (n_back_step < 1): print('n must be larger than zero') return if (total > 3): use_experimental_data = bool(int(sys.argv[3])) if (total > 4): learning_rate = float(sys.argv[4]) if (total > 5): epsilon = float(sys.argv[5]) if (total > 6): eps_threshold = float(sys.argv[6]) if (total > 7): gamma = float(sys.argv[7]) print("sim") print(sim_number) print("n_back_step") print(n_back_step) print("using experimental metabolite data") print(use_experimental_data) print("learning_rate") print(learning_rate) print("epsilon") print(epsilon) print("eps_threshold") print(eps_threshold) print("gamma") print(gamma) #Initial Values T = 298.15 R = 8.314e-03 RT = R * T N_avogadro = 6.022140857e+23 VolCell = 1.0e-15 Concentration2Count = N_avogadro * VolCell concentration_increment = 1 / (N_avogadro * VolCell) np.set_printoptions(suppress=True) #turn off printin fdat = open(cwd + '/GLYCOLYSIS_TCA_GOGAT/GLYCOLYSIS_TCA_GOGAT.dat', 'r') left = 'LEFT' right = 'RIGHT' left_compartment = 'LEFT_COMPARTMENT' right_compartment = 'RIGHT_COMPARTMENT' enzyme_level = 'ENZYME_LEVEL' deltag0 = 'DGZERO' deltag0_sigma = 'DGZERO StdDev' same_compartment = 'Same Compartment?' full_rxn = 'Full Rxn' reactions = pd.DataFrame(index=[], columns=[ left, right, left_compartment, right_compartment, enzyme_level, deltag0, deltag0_sigma, same_compartment, full_rxn ]) reactions.index.name = 'REACTION' S_matrix = pd.DataFrame(index=[], columns=[enzyme_level]) S_matrix.index.name = 'REACTION' for line in fdat: if (line.startswith('REACTION')): rxn_name = line[9:-1].lstrip() S_matrix.loc[rxn_name, enzyme_level] = 1.0 reactions.loc[rxn_name, enzyme_level] = 1.0 if (re.match("^LEFT\s", line)): line = line.upper() left_rxn = line[4:-1].lstrip() left_rxn = re.sub(r'\s+$', '', left_rxn) #Remove trailing white space reactions.loc[rxn_name, left] = left_rxn elif (re.match('^RIGHT\s', line)): line = line.upper() right_rxn = line[5:-1].lstrip() right_rxn = re.sub(r'\s+$', '', right_rxn) #Remove trailing white space reactions.loc[rxn_name, right] = right_rxn elif (line.startswith(left_compartment)): cpt_name = line[16:-1].lstrip() reactions.loc[rxn_name, left_compartment] = cpt_name reactants = re.split(' \+ ', left_rxn) for idx in reactants: values = re.split(' ', idx) if len(values) == 2: stoichiometry = np.float64(values[0]) molecule = values[1] if not re.search(':', molecule): molecule = molecule + ':' + cpt_name else: stoichiometry = np.float64(-1.0) molecule = values[0] if not re.search(':', molecule): molecule = molecule + ':' + cpt_name S_matrix.loc[rxn_name, molecule] = stoichiometry elif (line.startswith(right_compartment)): cpt_name = line[17:-1].lstrip() reactions.loc[rxn_name, right_compartment] = cpt_name products = re.split(' \+ ', right_rxn) for idx in products: values = re.split(' ', idx) if len(values) == 2: stoichiometry = np.float64(values[0]) molecule = values[1] if not re.search(':', molecule): molecule = molecule + ':' + cpt_name else: stoichiometry = np.float64(1.0) molecule = values[0] if not re.search(':', molecule): molecule = molecule + ':' + cpt_name S_matrix.loc[rxn_name, molecule] = stoichiometry elif (re.match("^ENZYME_LEVEL\s", line)): level = line[12:-1].lstrip() reactions.loc[rxn_name, enzyme_level] = float(level) S_matrix.loc[rxn_name, enzyme_level] = float(level) elif re.match('^COMMENT', line): continue elif re.match(r'//', line): continue elif re.match('^#', line): continue fdat.close() S_matrix.fillna(0, inplace=True) S_active = S_matrix[S_matrix[enzyme_level] > 0.0] active_reactions = reactions[reactions[enzyme_level] > 0.0] del S_active[enzyme_level] S_active = S_active.loc[:, (S_active != 0).any(axis=0)] np.shape(S_active.values) reactions[full_rxn] = reactions[left] + ' = ' + reactions[right] if (1): for idx in reactions.index: boltzmann_rxn_str = reactions.loc[idx, 'Full Rxn'] if re.search(':', boltzmann_rxn_str): all_cmprts = re.findall(':\S+', boltzmann_rxn_str) [s.replace(':', '') for s in all_cmprts] # remove all the ':'s different_compartments = 0 for cmpt in all_cmprts: if not re.match(all_cmprts[0], cmpt): different_compartments = 1 if ((not different_compartments) and (reactions[left_compartment].isnull or reactions[right_compartment].isnull)): reactions.loc[idx, left_compartment] = cmpt reactions.loc[idx, right_compartment] = cmpt reactions.loc[idx, same_compartment] = True if different_compartments: reactions.loc[idx, same_compartment] = False else: if (reactions.loc[idx, left_compartment] == reactions.loc[ idx, right_compartment]): reactions.loc[idx, same_compartment] = True else: reactions.loc[idx, same_compartment] = False # ## Calculate Standard Free Energies of Reaction reactions.loc['CSm', deltag0] = -35.1166 reactions.loc['ACONTm', deltag0] = 7.62949 reactions.loc['ICDHxm', deltag0] = -2.872 reactions.loc['AKGDam', deltag0] = -36.3549 reactions.loc['SUCOASm', deltag0] = 1.924481 reactions.loc['SUCD1m', deltag0] = 0 reactions.loc['FUMm', deltag0] = -3.44873 reactions.loc['MDHm', deltag0] = 29.9942 reactions.loc['GAPD', deltag0] = 6.68673 reactions.loc['PGK', deltag0] = -18.4733 reactions.loc['TPI', deltag0] = 5.48642 reactions.loc['FBA', deltag0] = 20.5096 reactions.loc['PYK', deltag0] = -27.5366 reactions.loc['PGM', deltag0] = 4.19953 reactions.loc['ENO', deltag0] = -4.08222 reactions.loc['HEX1', deltag0] = -17.0578 reactions.loc['PGI', deltag0] = 2.52401 reactions.loc['PFK', deltag0] = -15.4549 reactions.loc['PYRt2m', deltag0] = -RT * np.log(10) reactions.loc['PDHm', deltag0] = -43.9219 reactions.loc['GOGAT', deltag0] = 48.8552 reactions.loc['CSm', deltag0_sigma] = 0.930552 reactions.loc['ACONTm', deltag0_sigma] = 0.733847 reactions.loc['ICDHxm', deltag0_sigma] = 7.62095 reactions.loc['AKGDam', deltag0_sigma] = 7.97121 reactions.loc['SUCOASm', deltag0_sigma] = 1.48197 reactions.loc['SUCD1m', deltag0_sigma] = 2.31948 reactions.loc['FUMm', deltag0_sigma] = 0.607693 reactions.loc['MDHm', deltag0_sigma] = 0.422376 reactions.loc['GAPD', deltag0_sigma] = 0.895659 reactions.loc['PGK', deltag0_sigma] = 0.889982 reactions.loc['TPI', deltag0_sigma] = 0.753116 reactions.loc['FBA', deltag0_sigma] = 0.87227 reactions.loc['PYK', deltag0_sigma] = 0.939774 reactions.loc['PGM', deltag0_sigma] = 0.65542 reactions.loc['ENO', deltag0_sigma] = 0.734193 reactions.loc['HEX1', deltag0_sigma] = 0.715237 reactions.loc['PGI', deltag0_sigma] = 0.596775 reactions.loc['PFK', deltag0_sigma] = 0.886629 reactions.loc['PYRt2m', deltag0_sigma] = 0 reactions.loc['PDHm', deltag0_sigma] = 7.66459 reactions.loc['GOGAT', deltag0_sigma] = 2.0508 # ## Set Fixed Concentrations/Boundary Conditions conc = 'Conc' variable = 'Variable' conc_exp = 'Conc_Experimental' metabolites = pd.DataFrame(index=S_active.columns, columns=[conc, conc_exp, variable]) metabolites[conc] = 0.001 metabolites[variable] = True # Set the fixed metabolites: metabolites.loc['ATP:MITOCHONDRIA', conc] = 9.600000e-03 metabolites.loc['ATP:MITOCHONDRIA', variable] = False metabolites.loc['ADP:MITOCHONDRIA', conc] = 5.600000e-04 metabolites.loc['ADP:MITOCHONDRIA', variable] = False metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA', conc] = 2.000000e-02 metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA', variable] = False metabolites.loc['ATP:CYTOSOL', conc] = 9.600000e-03 metabolites.loc['ATP:CYTOSOL', variable] = False metabolites.loc['ADP:CYTOSOL', conc] = 5.600000e-04 metabolites.loc['ADP:CYTOSOL', variable] = False metabolites.loc['ORTHOPHOSPHATE:CYTOSOL', conc] = 2.000000e-02 metabolites.loc['ORTHOPHOSPHATE:CYTOSOL', variable] = False metabolites.loc['NADH:MITOCHONDRIA', conc] = 8.300000e-05 metabolites.loc['NADH:MITOCHONDRIA', variable] = False metabolites.loc['NAD+:MITOCHONDRIA', conc] = 2.600000e-03 metabolites.loc['NAD+:MITOCHONDRIA', variable] = False metabolites.loc['NADH:CYTOSOL', conc] = 8.300000e-05 metabolites.loc['NADH:CYTOSOL', variable] = False metabolites.loc['NAD+:CYTOSOL', conc] = 2.600000e-03 metabolites.loc['NAD+:CYTOSOL', variable] = False metabolites.loc['ACETYL-COA:MITOCHONDRIA', conc] = 6.06E-04 metabolites.loc['ACETYL-COA:MITOCHONDRIA', variable] = True metabolites.loc['COA:MITOCHONDRIA', conc] = 1.400000e-03 metabolites.loc['COA:MITOCHONDRIA', variable] = False metabolites.loc['CO2:MITOCHONDRIA', conc] = 1.000000e-04 metabolites.loc['CO2:MITOCHONDRIA', variable] = False metabolites.loc['H2O:MITOCHONDRIA', conc] = 55.5 metabolites.loc['H2O:MITOCHONDRIA', variable] = False metabolites.loc['H2O:CYTOSOL', conc] = 55.5 metabolites.loc['H2O:CYTOSOL', variable] = False metabolites.loc['BETA-D-GLUCOSE:CYTOSOL', conc] = 2.000000e-03 metabolites.loc['BETA-D-GLUCOSE:CYTOSOL', variable] = False metabolites.loc['L-GLUTAMATE:MITOCHONDRIA', conc] = 9.60e-05 metabolites.loc['L-GLUTAMATE:MITOCHONDRIA', variable] = False metabolites.loc['L-GLUTAMINE:MITOCHONDRIA', conc] = 3.81e-03 metabolites.loc['L-GLUTAMINE:MITOCHONDRIA', variable] = False #When loading experimental concentrations, first copy current #rule of thumb then overwrite with data values. metabolites[conc_exp] = metabolites[conc] metabolites.loc['(S)-MALATE:MITOCHONDRIA', conc_exp] = 1.68e-03 metabolites.loc['BETA-D-GLUCOSE-6-PHOSPHATE:CYTOSOL', conc_exp] = 7.88e-03 metabolites.loc['D-GLYCERALDEHYDE-3-PHOSPHATE:CYTOSOL', conc_exp] = 2.71e-04 metabolites.loc['PYRUVATE:MITOCHONDRIA', conc_exp] = 3.66e-03 metabolites.loc['ISOCITRATE:MITOCHONDRIA', conc_exp] = 1.000000e-03 metabolites.loc['OXALOACETATE:MITOCHONDRIA', conc_exp] = 1.000000e-03 metabolites.loc['3-PHOSPHO-D-GLYCEROYL_PHOSPHATE:CYTOSOL', conc_exp] = 1.000000e-03 metabolites.loc['ACETYL-COA:MITOCHONDRIA', conc_exp] = 6.06e-04 metabolites.loc['CITRATE:MITOCHONDRIA', conc_exp] = 1.96e-03 metabolites.loc['2-OXOGLUTARATE:MITOCHONDRIA', conc_exp] = 4.43e-04 metabolites.loc['FUMARATE:MITOCHONDRIA', conc_exp] = 1.15e-04 metabolites.loc['SUCCINYL-COA:MITOCHONDRIA', conc_exp] = 2.33e-04 metabolites.loc['3-PHOSPHO-D-GLYCERATE:CYTOSOL', conc_exp] = 1.54e-03 metabolites.loc['GLYCERONE_PHOSPHATE:CYTOSOL', conc_exp] = 3.060000e-03 metabolites.loc['SUCCINATE:MITOCHONDRIA', conc_exp] = 5.69e-04 metabolites.loc['PHOSPHOENOLPYRUVATE:CYTOSOL', conc_exp] = 1.84e-04 metabolites.loc['D-FRUCTOSE_1,6-BISPHOSPHATE:CYTOSOL', conc_exp] = 1.52e-02 metabolites.loc['D-FRUCTOSE_6-PHOSPHATE:CYTOSOL', conc_exp] = 2.52e-03 metabolites.loc['PYRUVATE:CYTOSOL', conc_exp] = 3.66E-03 metabolites.loc['2-PHOSPHO-D-GLYCERATE:CYTOSOL', conc_exp] = 9.180e-05 #%% nvariables = metabolites[metabolites[variable]].count() nvar = nvariables[variable] metabolites.sort_values( by=variable, axis=0, ascending=False, inplace=True, ) # ## Prepare model for optimization # - Adjust S Matrix to use only reactions with activity > 0, if necessary. # - Water stoichiometry in the stiochiometric matrix needs to be set to zero since water is held constant. # - The initial concentrations of the variable metabolites are random. # - All concentrations are changed to log counts. # - Equilibrium constants are calculated from standard free energies of reaction. # - R (reactant) and P (product) matrices are derived from S. # Make sure all the indices and columns are in the correct order: active_reactions = reactions[reactions[enzyme_level] > 0.0] Sactive_index = S_active.index active_reactions.reindex(index=Sactive_index, copy=False) S_active = S_active.reindex(columns=metabolites.index, copy=False) S_active['H2O:MITOCHONDRIA'] = 0 S_active['H2O:CYTOSOL'] = 0 where_are_NaNs = np.isnan(S_active) S_active[where_are_NaNs] = 0 S_mat = S_active.values Keq_constant = np.exp(-active_reactions[deltag0].astype('float') / RT) Keq_constant = Keq_constant.values P_mat = np.where(S_mat > 0, S_mat, 0) R_back_mat = np.where(S_mat < 0, S_mat, 0) E_regulation = np.ones( Keq_constant.size ) # THis is the vector of enzyme activities, Range: 0 to 1. mu0 = 1 #Dummy parameter for now; reserved for free energies of formation conc_type = conc if (use_experimental_data): print("USING EXPERIMENTAL DATA") conc_type = conc_exp variable_concs = np.array(metabolites[conc_type].iloc[0:nvar].values, dtype=np.float64) v_log_concs = -10 + 10 * np.random.rand( nvar) #Vary between 1 M to 1.0e-10 M v_concs = np.exp(v_log_concs) v_log_counts_stationary = np.log(v_concs * Concentration2Count) v_log_counts = v_log_counts_stationary #display(v_log_counts) fixed_concs = np.array(metabolites[conc_type].iloc[nvar:].values, dtype=np.float64) fixed_counts = fixed_concs * Concentration2Count f_log_counts = np.log(fixed_counts) complete_target_log_counts = np.log(Concentration2Count * metabolites[conc_type].values) target_v_log_counts = complete_target_log_counts[0:nvar] target_f_log_counts = complete_target_log_counts[nvar:] delta_increment_for_small_concs = (10**-50) * np.zeros( metabolites[conc_type].values.size) variable_concs_begin = np.array(metabolites[conc_type].iloc[0:nvar].values, dtype=np.float64) #%% Basic test v_log_counts = np.log(variable_concs_begin * Concentration2Count) E_regulation = np.ones( Keq_constant.size ) # THis is the vector of enzyme activities, Range: 0 to 1. nvar = v_log_counts.size #WARNING: INPUT LOG_COUNTS TO ALL FUNCTIONS. CONVERSION TO COUNTS IS DONE INTERNALLY res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts, method='lm', xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation)) res_lsq2 = least_squares(max_entropy_functions.derivatives, v_log_counts, method='dogbox', xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation)) rxn_flux = max_entropy_functions.oddsDiff(res_lsq1.x, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation) begin_log_metabolites = np.append(res_lsq1.x, f_log_counts) E_regulation = np.ones( Keq_constant.size ) # THis is the vector of enzyme activities, Range: 0 to 1. log_metabolites = np.append(res_lsq1.x, f_log_counts) KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) [RR, Jac] = max_entropy_functions.calc_Jac2(res_lsq1.x, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, E_regulation) A = max_entropy_functions.calc_A(res_lsq1.x, f_log_counts, S_mat, Jac, E_regulation) [ccc, fcc ] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR) React_Choice = 6 newE = max_entropy_functions.calc_reg_E_step(E_regulation, React_Choice, nvar, res_lsq1.x, f_log_counts, complete_target_log_counts, S_mat, A, rxn_flux, KQ_f) delta_S_metab = max_entropy_functions.calc_deltaS_metab( res_lsq1.x, target_v_log_counts) ipolicy = 7 #use ipolicy=1 or 4 #%% END Basic test #Machine learning device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device:', device) #set variables in ML program me.cwd = cwd me.device = device me.v_log_counts_static = v_log_counts_stationary me.target_v_log_counts = target_v_log_counts me.complete_target_log_counts = complete_target_log_counts me.Keq_constant = Keq_constant me.f_log_counts = f_log_counts me.P_mat = P_mat me.R_back_mat = R_back_mat me.S_mat = S_mat me.delta_increment_for_small_concs = delta_increment_for_small_concs me.nvar = nvar me.mu0 = mu0 me.gamma = gamma me.num_rxns = Keq_constant.size me.penalty_reward_scalar = penalty_reward_scalar #%% N, D_in, H, D_out = 1, Keq_constant.size, 20 * Keq_constant.size, 1 #create neural network nn_model = torch.nn.Sequential(torch.nn.Linear(D_in, H), torch.nn.Tanh(), torch.nn.Linear(H, D_out)).to(device) loss_fn = torch.nn.MSELoss(reduction='sum') #loss_fn = torch.nn.L1Loss() #learning_rate=5e-6 #optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9) optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9) #optimizer = torch.optim.Adam(nn_model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=500, verbose=True, min_lr=1e-10, cooldown=10, threshold=1e-5) v_log_counts = v_log_counts_stationary.copy() episodic_loss = [] episodic_loss_max = [] episodic_epr = [] episodic_reward = [] episodic_nn_step = [] episodic_random_step = [] epsilon_greedy_init = epsilon final_states = np.zeros(Keq_constant.size) final_KQ_fs = np.zeros(Keq_constant.size) final_KQ_rs = np.zeros(Keq_constant.size) epr_per_state = [] for update in range(0, updates): x_changing = 1 * torch.rand(1000, D_in, device=device) #generate state to use state_sample = np.zeros(Keq_constant.size) for sample in range(0, len(state_sample)): state_sample[sample] = np.random.uniform(1, 1) #annealing test if ((update % eps_threshold == 0) and (update != 0)): epsilon = epsilon / 2 print("RESET epsilon ANNEALING") print(epsilon) prediction_x_changing_previous = nn_model(x_changing) #nn_model.train() [sum_reward, average_loss,max_loss,final_epr,final_state,final_KQ_f,final_KQ_r, reached_terminal_state,\ random_steps_taken,nn_steps_taken] = me.sarsa_n(nn_model,loss_fn, optimizer, scheduler, state_sample, n_back_step, epsilon) print("EPISODE") print(update) print("MAXIMUM LAYER WEIGHTS") for layer in nn_model.modules(): try: print(torch.max(layer.weight)) except: print("") print('random,nn steps') print(random_steps_taken) print(nn_steps_taken) if (reached_terminal_state): final_states = np.vstack((final_states, final_state)) final_KQ_fs = np.vstack((final_KQ_fs, final_KQ_f)) final_KQ_rs = np.vstack((final_KQ_rs, final_KQ_r)) epr_per_state.append(final_epr) episodic_epr.append(final_epr) episodic_loss.append(average_loss) episodic_loss_max.append(max_loss) episodic_reward.append(sum_reward) episodic_nn_step.append(nn_steps_taken) episodic_random_step.append(random_steps_taken) scheduler.step(average_loss) print("TOTAL REWARD") print(sum_reward) print("ave loss") print(average_loss) print("max_loss") print(max_loss) print(optimizer.state_dict) print(scheduler.state_dict()) prediction_x_changing = nn_model(x_changing) total_prediction_changing_diff = sum( abs(prediction_x_changing - prediction_x_changing_previous)) print("TOTALPREDICTION") print(total_prediction_changing_diff) np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/data/' + 'temp_episodic_loss_' + str(n_back_step) + '_lr' + str(learning_rate) + '_' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_' + str(sim_number) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '.txt', episodic_loss, fmt='%f') np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/data/' + 'temp_epr_' + str(n_back_step) + '_lr' + str(learning_rate) + '_' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_' + str(sim_number) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '.txt', episodic_epr, fmt='%f') np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/data/' + 'temp_episodic_reward_' + str(n_back_step) + '_lr' + str(learning_rate) + '_' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_' + str(sim_number) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '.txt', episodic_reward, fmt='%f') if (update > 200): if ((max(episodic_loss[-100:]) - min(episodic_loss[-100:]) < 0.025) and (update > 350)): break #%% #gamma9 -> gamma=0.9 #n8 -> n_back_step=8 #k5 -> E=E-E/5 was used #lr5e6 -> begin lr=0.5*e-6 torch.save(nn_model, cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+ 'complete_model_gly_tca_gog_gamma9_n'+str(n_back_step)+'_k5_'\ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_' +str(int(use_experimental_data))+ '_sim'+str(sim_number) + '.pth') np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' + 'episodic_loss_gamma9_n' + str(n_back_step) + '_k5_' '_lr' + str(learning_rate) + '_threshold' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '_sim' + str(sim_number) + '.txt', episodic_loss, fmt='%f') np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' + 'episodic_loss_max_gamma9_n' + str(n_back_step) + '_k5_' + '_lr' + str(learning_rate) + '_threshold' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '_sim' + str(sim_number) + '.txt', episodic_loss_max, fmt='%f') np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' + 'episodic_reward_gamma9_n' + str(n_back_step) + '_k5_' + '_lr' + str(learning_rate) + '_threshold' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '_sim' + str(sim_number) + '.txt', episodic_reward, fmt='%f') np.savetxt(cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+ 'final_states_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+\ '.txt', final_states, fmt='%f') np.savetxt(cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+ 'final_KQF_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+\ '.txt', final_KQ_fs, fmt='%f') np.savetxt(cwd+'/GLYCOLYSIS_TCA_GOGAT/models_final_data/'+ 'final_KQR_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+\ '.txt', final_KQ_rs, fmt='%f') np.savetxt(cwd + '/GLYCOLYSIS_TCA_GOGAT/models_final_data/' + 'epr_per_state_gamma9_n' + str(n_back_step) + '_k5_' + '_lr' + str(learning_rate) + '_threshold' + str(eps_threshold) + '_eps' + str(epsilon_greedy_init) + '_penalty_reward_scalar_' + str(me.penalty_reward_scalar) + '_use_experimental_metab_' + str(int(use_experimental_data)) + '_sim' + str(sim_number) + '.txt', epr_per_state, fmt='%f')
Keq_inverse = np.power(Keq_constant,-1) KQ_r = max_entropy_functions.odds(log_metabolites,mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1) [RR,Jac] = max_entropy_functions.calc_Jac2(res_lsq1.x, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, E_regulation) A = max_entropy_functions.calc_A(res_lsq1.x,f_log_counts, S_mat, Jac, E_regulation ) [ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR) React_Choice=6 newE = max_entropy_functions.calc_reg_E_step(E_regulation,React_Choice, nvar, res_lsq1.x, f_log_counts, complete_target_log_counts, S_mat, A, rxn_flux,KQ_f) delta_S_metab = max_entropy_functions.calc_deltaS_metab(res_lsq1.x, target_v_log_counts); ipolicy = 7 #use ipolicy=1 or 4 reaction_choice = max_entropy_functions.get_enzyme2regulate(ipolicy, delta_S_metab, ccc, KQ_f, E_regulation, res_lsq1.x) display(newE) display(reaction_choice) #%% import torch device = torch.device("cpu") import machine_learning_functions as me gamma = 0.9
def policy_function(nn_model, state, v_log_counts_path, *args): #last input argument should be epsilon for use when using greedy-epsilon algorithm. KQ_f_matrix = np.zeros(shape=(num_rxns, num_rxns)) KQ_r_matrix = np.zeros(shape=(num_rxns, num_rxns)) states_matrix = np.zeros(shape=(num_rxns, num_rxns)) delta_S_metab_matrix = np.zeros(shape=(nvar, num_rxns)) v_log_counts_matrix = np.zeros(shape=(nvar, num_rxns)) varargin = args nargin = len(varargin) epsilon_greedy = 0.0 if (nargin == 1): epsilon_greedy = varargin[0] rxn_choices = [i for i in range(num_rxns)] res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method1, bounds=(-500, 500), xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) if (res_lsq.optimality > 1e-05): res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method2, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) if (res_lsq.optimality > 1e-05): res_lsq = least_squares( max_entropy_functions.derivatives, v_log_counts_path, method=Method3, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) #v_log_counts = v_log_counts_path.copy() v_log_counts = res_lsq.x if (np.sum(np.abs(v_log_counts - v_log_counts_path)) > 0.01): print("ERROR IN POLICY V_COUNT OPTIMIZATION") log_metabolites = np.append(v_log_counts, f_log_counts) rxn_flux = max_entropy_functions.oddsDiff(v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state) KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) [RR, Jac] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, state) A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat, Jac, state) delta_S_metab = max_entropy_functions.calc_deltaS_metab( v_log_counts, target_v_log_counts) #delta_S = max_entropy_functions.calc_deltaS(v_log_counts, f_log_counts, S_mat, KQ_f) [ccc, fcc ] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR) init_action_val = -np.inf action_value_vec = np.zeros(num_rxns) state_value_vec = np.zeros(num_rxns) E_test_vec = np.zeros(num_rxns) old_E_test_vec = np.zeros(num_rxns) current_reward_vec = np.zeros(num_rxns) #print("BEGIN ACTIONS") for act in range(0, num_rxns): React_Choice = act #regulate each reaction. old_E = state[act] newE = max_entropy_functions.calc_reg_E_step(state, React_Choice, nvar, v_log_counts, f_log_counts, complete_target_log_counts, S_mat, A, rxn_flux, KQ_f,\ delta_S_metab) trial_state_sample = state.copy() #DO NOT MODIFY ORIGINAL STATE trial_state_sample[React_Choice] = newE states_matrix[:, act] = trial_state_sample.copy() #re-optimize new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method1, bounds=(-500, 500), xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, trial_state_sample)) if (new_res_lsq.optimality >= 1e-05): new_res_lsq = least_squares( max_entropy_functions.derivatives, v_log_counts, method=Method2, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, trial_state_sample)) if (new_res_lsq.optimality >= 1e-05): new_res_lsq = least_squares( max_entropy_functions.derivatives, v_log_counts, method=Method3, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, trial_state_sample)) new_v_log_counts = new_res_lsq.x v_log_counts_matrix[:, act] = new_v_log_counts.copy() new_log_metabolites = np.append(new_v_log_counts, f_log_counts) KQ_f_matrix[:, act] = max_entropy_functions.odds( new_log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r_matrix[:, act] = max_entropy_functions.odds( new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) delta_S_metab_matrix[:, act] = max_entropy_functions.calc_deltaS_metab( new_v_log_counts, target_v_log_counts) value_current_state = state_value( nn_model, torch.from_numpy(trial_state_sample).float().to(device)) value_current_state = value_current_state.item() current_reward = reward_value(new_v_log_counts, v_log_counts, \ KQ_f_matrix[:,act], KQ_r_matrix[:,act],\ trial_state_sample, state) if (current_reward == penalty_exclusion_reward): rxn_choices.remove(act) action_value = current_reward + ( gamma) * value_current_state #note, action is using old KQ values action_value_vec[act] = action_value old_E_test_vec[act] = old_E E_test_vec[act] = newE state_value_vec[act] = value_current_state current_reward_vec[act] = current_reward #Should have smaller EPR #print(current_reward) #USE PENALTY REWARDS if (len(np.flatnonzero(action_value_vec == action_value_vec.max())) == 0): print("current action_value_vec") print(action_value_vec) print(action_value_vec.max()) #only choose from non penalty rewards action_choice_index = np.random.choice( np.flatnonzero(action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max())) action_choice = rxn_choices[action_choice_index] arr_choice_index = np.flatnonzero( action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max()) arr_choice = np.asarray(rxn_choices)[arr_choice_index] arr_choice_reg = np.flatnonzero(state[arr_choice] < 1) if (arr_choice_reg.size > 1): print('using tie breaker') print(arr_choice[arr_choice_reg]) action_choice = np.random.choice(arr_choice[arr_choice_reg]) used_random_step = False unif_rand = np.random.uniform(0, 1) if ((unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)): #if (len(rxn_choices)>1): # rxn_choices.remove(action_choice) #print("USING EPSILON GREEDY") #print(action_choice) used_random_step = True random_choice = random.choice(rxn_choices) action_choice = random_choice used_random_step = 1 if (current_reward_vec == penalty_exclusion_reward).all(): print("OUT OF REWARDS") action_choice = -1 if current_reward_vec[action_choice] == penalty_exclusion_reward: print("state_value_vec") print(state_value_vec) print("current_reward_vec") print(current_reward_vec) print("used_random_step") print(used_random_step) print("rxn_choices") print(rxn_choices) # ============================================================================= # print("action_choice") # print(action_choice) # print(delta_S_metab_matrix[:,action_choice]) # print(states_matrix[:,action_choice]) # print(states_matrix[:,action_choice]*KQ_f_matrix[:,action_choice]) # print("rewards") # print(current_reward_vec) # # ============================================================================= #breakpoint() return [action_choice,current_reward_vec[action_choice],\ KQ_f_matrix[:,action_choice],KQ_r_matrix[:,action_choice],\ v_log_counts_matrix[:,action_choice],\ states_matrix[:,action_choice],\ delta_S_metab_matrix[:,action_choice],used_random_step]
def sarsa_n(nn_model, loss_fn, optimizer, scheduler, state_sample, n_back_step, epsilon_greedy): #reset for each episode. policy will add random_steps_taken = 0 nn_steps_taken = 0 maximum_predicted_value = 0 layer_weight = torch.zeros(1, device=device) final_state = [] final_KQ_f = [] final_KQ_r = [] reached_terminal_state = False average_loss = [] final_reward = 0 sum_reward_episode = 0 end_of_path = 1000 #this is the maximum length a path can take states_matrix = np.zeros(shape=(num_rxns, end_of_path + 1)) states_matrix[:, 0] = state_sample res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_static, method=Method1, bounds=(-500, 500), xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0])) if (res_lsq.success == False): res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_static, method=Method2, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0])) if (res_lsq.success == False): res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_static, method=Method3, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0])) v_log_counts_current = res_lsq.x.copy() log_metabolites = np.append(v_log_counts_current, f_log_counts) rxn_flux_init = max_entropy_functions.oddsDiff( v_log_counts_current, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, 0]) KQ_f_current = max_entropy_functions.odds(log_metabolites, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant) Keq_inverse = np.power(Keq_constant, -1) KQ_r_current = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat, R_back_mat, delta_increment_for_small_concs, Keq_inverse, -1) delta_S_metab_current = max_entropy_functions.calc_deltaS_metab( v_log_counts_current, target_v_log_counts) #[ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A_init, S_mat, rxn_flux_init, RR) reward_vec = np.zeros(end_of_path + 1) reward_vec[0] = 0.0 rxn_flux_path = rxn_flux_init.copy() #A_path = A_init.copy() for t in range(0, end_of_path): if (t < end_of_path): #This represents the choice from the current policy. [React_Choice,reward_vec[t+1],\ KQ_f_current, KQ_r_current,\ v_log_counts_current,\ states_matrix[:,t+1],\ delta_S_metab_current,\ used_random_step] = policy_function(nn_model, states_matrix[:,t], v_log_counts_current, epsilon_greedy)#regulate each reaction. if (used_random_step): random_steps_taken += 1 else: nn_steps_taken += 1 if (React_Choice == -1): print("out of rewards, final state") print(states_matrix[:, t + 1]) break rxn_flux_path = max_entropy_functions.oddsDiff( v_log_counts_current, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, states_matrix[:, t + 1]) epr_path = max_entropy_functions.entropy_production_rate( KQ_f_current, KQ_r_current, states_matrix[:, t + 1]) sum_reward_episode += reward_vec[t + 1] final_state = states_matrix[:, t + 1].copy() #We stop the path if we have no more positive loss function values, or if we revisit a state. if ((delta_S_metab_current <= 0.0).all()): end_of_path = t + 1 #stops simulation at step t+1 reached_terminal_state = True final_state = states_matrix[:, t + 1].copy() final_KQ_f = KQ_f_current.copy() final_KQ_r = KQ_r_current.copy() final_reward = epr_path #breakpoint() print( "**************************************Path Length ds<0******************************************" ) print(end_of_path) print("Final STATE") print(states_matrix[:, t + 1]) print(rxn_flux_path) print("original epr") print(epr_path) print("all rewards:") #print(reward_vec[0:t+1]) tau = t - n_back_step + 1 if (tau >= 0): #THIS IS THE FORWARD estimate_value = torch.zeros(1, device=device) for i in range(tau + 1, min(tau + n_back_step, end_of_path) + 1): estimate_value += (gamma**(i - tau - 1)) * reward_vec[i] if ((tau + n_back_step) < end_of_path): value_tau_n = state_value( nn_model, torch.from_numpy( states_matrix[:, tau + n_back_step]).float().to(device)) estimate_value += (gamma**(n_back_step)) * value_tau_n value_tau = state_value( nn_model, torch.from_numpy(states_matrix[:, tau]).float().to(device)) if (value_tau.requires_grad == False): print('value tau broken') if (estimate_value.requires_grad == True): estimate_value.detach_() #THIS IS THE END OF FORWARD #WARNING #loss ordering should be input with requires_grad == True, #followed by target with requires_grad == False optimizer.zero_grad() loss = (loss_fn(value_tau, estimate_value)) #currently MSE loss.backward() clipping_value = 1.0 #torch.nn.utils.clip_grad_value_(nn_model.parameters(), clipping_value) torch.nn.utils.clip_grad_norm_(nn_model.parameters(), clipping_value) optimizer.step() average_loss.append(loss.item()) if (tau >= (end_of_path - 1)): break #after episode is finished, take average loss average_loss_episode = np.mean(average_loss) #print(average_loss) print("index of max error on path") print(average_loss.index(max(average_loss))) #print("All rewards") #print(reward_vec[0:t+1]) return [sum_reward_episode, average_loss_episode,max(average_loss),final_reward, final_state, final_KQ_f,final_KQ_r,\ reached_terminal_state, random_steps_taken,nn_steps_taken]
def policy_function(nn_model, state, v_log_counts_path, *args ): #last input argument should be epsilon for use when using greedy-epsilon algorithm. varargin = args nargin = len(varargin) epsilon_greedy = 0.0 if (nargin == 1): epsilon_greedy = varargin[0] used_random_step=False rxn_choices = [i for i in range(num_rxns)] res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method1, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) if (res_lsq.success==False): print("USING DOGBOX") print("v_log_counts_path") print(v_log_counts_path) print("state") print(state) res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method2, bounds=(-500,500),xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) if (res_lsq.success==False): res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts_path, method=Method3,xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state)) #v_log_counts = v_log_counts_path.copy() v_log_counts = res_lsq.x log_metabolites = np.append(v_log_counts, f_log_counts) rxn_flux = max_entropy_functions.oddsDiff(v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, state) KQ_f = max_entropy_functions.odds(log_metabolites, mu0,S_mat, R_back_mat, P_mat, delta_increment_for_small_concs,Keq_constant); Keq_inverse = np.power(Keq_constant,-1) KQ_r = max_entropy_functions.odds(log_metabolites, mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1); [RR,Jac] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, state) A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat, Jac, state ) delta_S_metab = max_entropy_functions.calc_deltaS_metab(v_log_counts, target_v_log_counts) [ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR) indices = [i for i in range(0,len(Keq_constant))] action_value_vec = np.zeros(num_rxns) current_reward_vec = np.zeros(num_rxns) current_state_vec = np.zeros(num_rxns) variables=[nn_model,state, nvar, v_log_counts, f_log_counts,\ complete_target_log_counts, A, rxn_flux, KQ_f,\ delta_S_metab,\ mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant] start = time.time() with Pool() as pool: async_result = pool.starmap(potential_step, zip(indices, repeat(variables))) pool.close() pool.join() end = time.time() total = end-start #only choose from non penalty rewards time_cpu=0 time_nn=0 for act in range(0,len(async_result)): if (async_result[act][1] == penalty_exclusion_reward): rxn_choices.remove(act) action_value_vec[act] = async_result[act][0] current_reward_vec[act] = async_result[act][1] time_cpu+=async_result[act][7] time_nn+=async_result[act][8] current_state_vec[act] = async_result[act][9] #print(current_reward_vec) if (len(rxn_choices) == 0): print("OUT OF REWARDS") action_choice=-1 else: try: action_choice_index = np.random.choice(np.flatnonzero(action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max())) action_choice = rxn_choices[action_choice_index] except: print("WARNING ERROR SHOULD NOT BE HAPPINING") print("rxn_choices") print(rxn_choices) print("action_value_vec") print(action_value_vec) print("action_value_vec[rxn_choices].max()") print(action_value_vec[rxn_choices].max()) print(np.flatnonzero(action_value_vec[rxn_choices] == action_value_vec[rxn_choices].max())) print("current_reward_vec") print(current_reward_vec) print("current_state_vec") print(current_state_vec) print("MAXIMUM LAYER WEIGHTS") for layer in nn_model.modules(): try: print(torch.max(layer.weight)) except: print("") print("async_result") print(async_result) action_choice = -1 unif_rand = np.random.uniform(0,1) if ( (unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)): #if (len(rxn_choices)>1): # rxn_choices.remove(action_choice) #print("USING EPSILON GREEDY") #print(action_choice) used_random_step=True random_choice = random.choice(rxn_choices) action_choice = random_choice used_random_step=1 if (np.sum(np.abs(v_log_counts - v_log_counts_path)) > 0.1): print("ERROR IN POLICY V_COUNT OPTIMIZATION") #print("async_result") #print(async_result) print("state") print(state) print("v_log_counts") print(v_log_counts) print("v_log_counts_path") print(v_log_counts_path) print("current_reward_vec") print(current_reward_vec) print("action_value_vec") print(action_value_vec) print("rxn_choices") print(rxn_choices) print("MAXIMUM LAYER WEIGHTS") for layer in nn_model.modules(): try: print(torch.max(layer.weight)) except: print("") #async_result order #[action_value, current_reward,KQ_f_new,KQ_r_new,new_v_log_counts,trial_state_sample,new_delta_S_metab] return [action_choice,async_result[action_choice][1],\ async_result[action_choice][2],async_result[action_choice][3],\ async_result[action_choice][4],\ async_result[action_choice][5],\ async_result[action_choice][6],used_random_step,time_cpu,time_nn]
def potential_step(index, other_args): React_Choice=index nn_model,state, nvar, v_log_counts, f_log_counts,\ complete_target_log_counts, A, rxn_flux, KQ_f,\ delta_S_metab,\ mu0, S_mat, R_back_mat, P_mat, \ delta_increment_for_small_concs, Keq_constant = other_args newE = max_entropy_functions.calc_reg_E_step(state, React_Choice, nvar, v_log_counts, f_log_counts, complete_target_log_counts, S_mat, A, rxn_flux, KQ_f,\ delta_S_metab) trial_state_sample = state.copy()#DO NOT MODIFY ORIGINAL STATE trial_state_sample[React_Choice] = newE #re-optimize start_cpu = time.time() new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method1, xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, trial_state_sample)) if (new_res_lsq.success==False): print("USING DOGBOX") print("v_log_counts") print(v_log_counts) print("trial_state_sample") print(trial_state_sample) new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method2, bounds=(-500,500), xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, trial_state_sample)) if (new_res_lsq.success==False): new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method=Method3,xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, trial_state_sample)) end_cpu = time.time() new_v_log_counts = new_res_lsq.x new_log_metabolites = np.append(new_v_log_counts, f_log_counts) new_delta_S_metab = max_entropy_functions.calc_deltaS_metab(new_v_log_counts, target_v_log_counts) KQ_f_new = max_entropy_functions.odds(new_log_metabolites, mu0,S_mat, R_back_mat, P_mat, delta_increment_for_small_concs,Keq_constant); Keq_inverse = np.power(Keq_constant,-1) KQ_r_new = max_entropy_functions.odds(new_log_metabolites, mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1); begin_nn = time.time() value_current_state = state_value(nn_model, torch.from_numpy(trial_state_sample).float().to(device) ) value_current_state = value_current_state.item() end_nn = time.time() current_reward = reward_value(new_v_log_counts, v_log_counts, \ KQ_f_new, KQ_r_new,\ trial_state_sample, state) action_value = current_reward + (gamma) * value_current_state #note, action is using old KQ values return [action_value, current_reward,KQ_f_new,KQ_r_new,new_v_log_counts,trial_state_sample,new_delta_S_metab, end_cpu-start_cpu,end_nn-begin_nn,value_current_state]
def run(argv): try: os.makedirs(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data') except FileExistsError: # directory already exists pass try: os.makedirs(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data') except FileExistsError: # directory already exists pass #default values #If no experimental data is available, we can estimate using 'rule-of-thumb' data at 0.001 use_experimental_data=False learning_rate=1e-8 #3rd epsilon=0.05 #4th eps_threshold=25 #5th gamma = 0.9 #6th updates = 500 penalty_reward_scalar=0.0 #load input total = len(sys.argv) cmdargs = str(sys.argv) print ("The total numbers of args passed to the script: %d " % total) print ("Args list: %s " % cmdargs) print ("Script name: %s" % str(sys.argv[0])) for i in range(total): print ("Argument # %d : %s" % (i, str(sys.argv[i]))) sim_number=int(sys.argv[1]) n_back_step=int(sys.argv[2]) if (total > 3): use_experimental_data=bool(int(sys.argv[3])) if (total > 4): learning_rate=float(sys.argv[4]) if (total > 5): epsilon=float(sys.argv[5]) if (total > 6): eps_threshold=float(sys.argv[6]) if (total > 7): gamma=float(sys.argv[7]) pd.set_option('display.max_columns', None,'display.max_rows', None) print("sim") print(sim_number) print("n_back_step") print(n_back_step) print("using experimental metabolite data") print(use_experimental_data) print("learning_rate") print(learning_rate) print("epsilon") print(epsilon) print("eps_threshold") print(eps_threshold) print("gamma") print(gamma) T = 298.15 R = 8.314e-03 RT = R*T N_avogadro = 6.022140857e+23 VolCell = 1.0e-15 Concentration2Count = N_avogadro * VolCell concentration_increment = 1/(N_avogadro*VolCell) np.set_printoptions(suppress=True)#turn off printin # In[3]: #with open( cwd + '/TCA_PPP_GLYCOLYSIS_CELLWALL/TCA_PPP_Glycolysis_CellWall3b.dat', 'r') as f: # print(f.read()) # In[5]: fdat = open(cwd + '/TCA_PPP_GLYCOLYSIS_CELLWALL/TCA_PPP_Glycolysis_CellWall3b.dat', 'r') #fdat = open('TCA_PPP_Glycolysis.dat', 'r') left ='LEFT' right = 'RIGHT' left_compartment = 'LEFT_COMPARTMENT' right_compartment = 'RIGHT_COMPARTMENT' enzyme_level = 'ENZYME_LEVEL' deltag0 = 'DGZERO' deltag0_sigma = 'DGZERO StdDev' same_compartment = 'Same Compartment?' full_rxn = 'Full Rxn' reactions = pd.DataFrame(index=[],columns=[left, right, left_compartment, right_compartment, enzyme_level, deltag0, deltag0_sigma, same_compartment,full_rxn]) reactions.index.name='REACTION' S_matrix = pd.DataFrame(index=[],columns=[enzyme_level]) S_matrix.index.name='REACTION' for line in fdat: if (line.startswith('REACTION')): rxn_name = line[9:-1].lstrip() S_matrix.loc[rxn_name,enzyme_level] = 1.0 reactions.loc[rxn_name,enzyme_level] = 1.0 if (re.match("^LEFT\s",line)): line = line.upper() left_rxn = line[4:-1].lstrip() left_rxn = re.sub(r'\s+$', '', left_rxn) #Remove trailing white space reactions.loc[rxn_name,left] = left_rxn elif (re.match('^RIGHT\s',line)): line = line.upper() right_rxn = line[5:-1].lstrip() right_rxn = re.sub(r'\s+$', '', right_rxn) #Remove trailing white space reactions.loc[rxn_name,right] = right_rxn elif (line.startswith(left_compartment)): cpt_name = line[16:-1].lstrip() reactions.loc[rxn_name,left_compartment] = cpt_name reactants = re.split(' \+ ',left_rxn) for idx in reactants: values = re.split(' ', idx); if len(values) == 2: stoichiometry = np.float64(values[0]); molecule = values[1]; if not re.search(':',molecule): molecule = molecule + ':' + cpt_name else: stoichiometry = np.float64(-1.0); molecule = values[0]; if not re.search(':',molecule): molecule = molecule + ':' + cpt_name S_matrix.loc[rxn_name,molecule] = stoichiometry; elif (line.startswith(right_compartment)): cpt_name = line[17:-1].lstrip() reactions.loc[rxn_name,right_compartment] = cpt_name products = re.split(' \+ ',right_rxn) for idx in products: values = re.split(' ', idx); if len(values) == 2: stoichiometry = np.float64(values[0]); molecule = values[1]; if not re.search(':',molecule): molecule = molecule + ':' + cpt_name else: stoichiometry = np.float64(1.0); molecule = values[0]; if not re.search(':',molecule): molecule = molecule + ':' + cpt_name S_matrix.loc[rxn_name,molecule] = stoichiometry; elif (re.match("^ENZYME_LEVEL\s", line)): level = line[12:-1].lstrip() reactions.loc[rxn_name,enzyme_level] = float(level) S_matrix.loc[rxn_name,enzyme_level] = float(level) elif re.match('^COMMENT',line): continue elif re.match(r'//',line): continue elif re.match('^#',line): continue # elif (re.match("^[N,P]REGULATION\s", line)): # reg = line # reactions.loc[rxn_name,regulation] = reg fdat.close() S_matrix.fillna(0,inplace=True) S_active = S_matrix[S_matrix[enzyme_level] > 0.0] active_reactions = reactions[reactions[enzyme_level] > 0.0] del S_active[enzyme_level] # Delete any columns/metabolites that have all zeros in the S matrix: S_active = S_active.loc[:, (S_active != 0).any(axis=0)] np.shape(S_active.values) #print(S_active.shape) #print(S_active) reactions[full_rxn] = reactions[left] + ' = ' + reactions[right] # In[6]: if (1): for idx in reactions.index: #print(idx,flush=True) boltzmann_rxn_str = reactions.loc[idx,'Full Rxn'] if re.search(':',boltzmann_rxn_str): all_cmprts = re.findall(':\S+', boltzmann_rxn_str) [s.replace(':', '') for s in all_cmprts] # remove all the ':'s different_compartments = 0 for cmpt in all_cmprts: if not re.match(all_cmprts[0],cmpt): different_compartments = 1 if ((not different_compartments) and (reactions[left_compartment].isnull or reactions[right_compartment].isnull)): reactions.loc[idx,left_compartment] = cmpt reactions.loc[idx,right_compartment] = cmpt reactions.loc[idx,same_compartment] = True if different_compartments: reactions.loc[idx,same_compartment] = False else: if (reactions.loc[idx,left_compartment] == reactions.loc[idx,right_compartment]): reactions.loc[idx,same_compartment] = True else: reactions.loc[idx,same_compartment] = False #print(reactions) reactions.loc['CSm',deltag0] = -35.8057 reactions.loc['ACONTm',deltag0] = 7.62962 reactions.loc['ICDHxm',deltag0] = -2.6492 reactions.loc['AKGDam',deltag0] = -37.245 reactions.loc['SUCOASm',deltag0] = 2.01842 reactions.loc['SUCD1m',deltag0] = -379.579 reactions.loc['FUMm',deltag0] = -3.44728 reactions.loc['MDHm',deltag0] = 29.5419 reactions.loc['GAPD',deltag0] = 5.24202 reactions.loc['PGK',deltag0] = -18.5083 reactions.loc['TPI',deltag0] = 5.49798 reactions.loc['FBA',deltag0] = 21.4506 reactions.loc['PYK',deltag0] = -27.3548 reactions.loc['PGM',deltag0] = 4.17874 reactions.loc['ENO',deltag0] = -4.0817 reactions.loc['HEX1',deltag0] = -16.7776 reactions.loc['PGI',deltag0] = 2.52206 reactions.loc['PFK',deltag0] = -16.1049 reactions.loc['PYRt2m',deltag0] = -RT*np.log(10) reactions.loc['PDHm',deltag0] = -44.1315 reactions.loc['G6PDH2r',deltag0] = -3.89329 reactions.loc['PGL',deltag0] = -22.0813 reactions.loc['GND',deltag0] = 2.32254 reactions.loc['RPE',deltag0] = -3.37 reactions.loc['RPI',deltag0] = -1.96367 reactions.loc['TKT2',deltag0] = -10.0342 reactions.loc['TALA',deltag0] = -0.729232 reactions.loc['FBA3',deltag0] = 13.9499 reactions.loc['PFK_3',deltag0] = -9.33337 reactions.loc['TKT1',deltag0] = -3.79303 reactions.loc['Glutamine-fructose-6-phosphate aminotransferase',deltag0] = -13.4054 reactions.loc['Glucosamine-6-phosphate N-acetyltransferase',deltag0] = -23.7065 reactions.loc['N-acetylglucosamine-phosphate mutase',deltag0] = 4.65558 reactions.loc['UDP N-acetylglucosamine pyrophosphorylase',deltag0] = 0.539147 reactions.loc['Hyaluronan Synthase',deltag0] = -14.4143 reactions.loc['Phosphoglucomutase',deltag0] = 7.41831 reactions.loc['UTP-glucose-1-phosphate uridylyltransferase',deltag0] = 1.51043 reactions.loc['1,3-beta-glucan synthase',deltag0] = -11.534 reactions.loc['Citrate-oxaloacetate exchange',deltag0] = 0 reactions.loc['CITRATE_LYASE',deltag0] = 10.0299 reactions.loc['MDHc',deltag0] = -29.5419 reactions.loc['MDH-NADPc',deltag0] = -29.7376 reactions.loc['ME1c',deltag0] = 4.56191 reactions.loc['ME2c',deltag0] = 4.75763 reactions.loc['Pyruvate Carboxylase',deltag0] = -0.795825 reactions.loc['Aldose 1-epimerase',deltag0] = 0 reactions.loc['HEX1a',deltag0] = -16.7776 reactions.loc['PGI-1',deltag0] = 2.52206 reactions.loc['CSm',deltag0_sigma] = 0.930552 reactions.loc['ACONTm',deltag0_sigma] = 0.733847 reactions.loc['ICDHxm',deltag0_sigma] = 7.62095 reactions.loc['AKGDam',deltag0_sigma] = 7.97121 reactions.loc['SUCOASm',deltag0_sigma] = 1.48197 reactions.loc['SUCD1m',deltag0_sigma] = 7.8098 reactions.loc['FUMm',deltag0_sigma] = 0.607693 reactions.loc['MDHm',deltag0_sigma] = 0.422376 reactions.loc['GAPD',deltag0_sigma] = 0.895659 reactions.loc['PGK',deltag0_sigma] = 0.889982 reactions.loc['TPI',deltag0_sigma] = 0.753116 reactions.loc['FBA',deltag0_sigma] = 0.87227 reactions.loc['PYK',deltag0_sigma] = 0.939774 reactions.loc['PGM',deltag0_sigma] = 0.65542 reactions.loc['ENO',deltag0_sigma] = 0.734193 reactions.loc['HEX1',deltag0_sigma] = 0.715237 reactions.loc['PGI',deltag0_sigma] = 0.596775 reactions.loc['PFK',deltag0_sigma] = 0.886629 reactions.loc['PYRt2m',deltag0_sigma] = 0 reactions.loc['PDHm',deltag0_sigma] = 7.66459 reactions.loc['G6PDH2r',deltag0_sigma] = 2.11855 reactions.loc['PGL',deltag0_sigma] = 2.62825 reactions.loc['GND',deltag0_sigma] = 7.60864 reactions.loc['RPE',deltag0_sigma] = 1.16485 reactions.loc['RPI',deltag0_sigma] = 1.16321 reactions.loc['TKT2',deltag0_sigma] = 2.08682 reactions.loc['TALA',deltag0_sigma] = 1.62106 reactions.loc['FBA3',deltag0_sigma] = 7.36854 reactions.loc['PFK_3',deltag0_sigma] = 7.3671 reactions.loc['TKT1',deltag0_sigma] = 2.16133 reactions.loc['Glutamine-fructose-6-phosphate aminotransferase',deltag0_sigma] = 3.08807 reactions.loc['Glucosamine-6-phosphate N-acetyltransferase',deltag0_sigma] = 4.26738 reactions.loc['N-acetylglucosamine-phosphate mutase',deltag0_sigma] = 3.06369 reactions.loc['UDP N-acetylglucosamine pyrophosphorylase',deltag0_sigma] = 3.12527 reactions.loc['Hyaluronan Synthase',deltag0_sigma] = 9.46851 reactions.loc['Phosphoglucomutase',deltag0_sigma] = 1.09029 reactions.loc['UTP-glucose-1-phosphate uridylyltransferase',deltag0_sigma] = 1.14644 reactions.loc['1,3-beta-glucan synthase',deltag0_sigma] = 7.80447 reactions.loc['Citrate-oxaloacetate exchange',deltag0_sigma] = 0 reactions.loc['CITRATE_LYASE',deltag0_sigma] = 0.928303 reactions.loc['MDHc',deltag0_sigma] = 0.422376 reactions.loc['MDH-NADPc',deltag0_sigma] = 0.531184 reactions.loc['ME1c',deltag0_sigma] = 7.60174 reactions.loc['ME2c',deltag0_sigma] = 7.61042 reactions.loc['Pyruvate Carboxylase',deltag0_sigma] = 7.60419 reactions.loc['Aldose 1-epimerase',deltag0_sigma] = 0 reactions.loc['HEX1a',deltag0_sigma] = 0.715237 reactions.loc['PGI-1',deltag0_sigma] = 0.596775 # ## Calculate Standard Free Energies of Reaction # In[49]: conc = 'Conc' variable = 'Variable' conc_exp = 'Conc_Experimental' metabolites = pd.DataFrame(index = S_active.columns, columns=[conc,conc_exp,variable]) metabolites[conc] = 0.001 metabolites[variable] = True # Set the fixed metabolites: metabolites.loc['ATP:MITOCHONDRIA',conc] = 9.600000e-03 metabolites.loc['ATP:MITOCHONDRIA',variable] = False metabolites.loc['ADP:MITOCHONDRIA',conc] = 5.600000e-04 metabolites.loc['ADP:MITOCHONDRIA',variable] = False metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA',conc] = 2.000000e-02 metabolites.loc['ORTHOPHOSPHATE:MITOCHONDRIA',variable] = False metabolites.loc['ATP:CYTOSOL',conc] = 9.600000e-03 metabolites.loc['ATP:CYTOSOL',variable] = False metabolites.loc['ADP:CYTOSOL',conc] = 5.600000e-04 metabolites.loc['ADP:CYTOSOL',variable] = False metabolites.loc['ORTHOPHOSPHATE:CYTOSOL',conc] = 2.000000e-02 metabolites.loc['ORTHOPHOSPHATE:CYTOSOL',variable] = False metabolites.loc['UTP:CYTOSOL',conc] = 9.600000e-03 metabolites.loc['UTP:CYTOSOL',variable] = False metabolites.loc['UDP:CYTOSOL',conc] = 5.600000e-04 metabolites.loc['UDP:CYTOSOL',variable] = False metabolites.loc['DIPHOSPHATE:CYTOSOL',conc] = 2.000000e-02 metabolites.loc['DIPHOSPHATE:CYTOSOL',variable] = False metabolites.loc['NADH:MITOCHONDRIA',conc] = 8.300000e-05 metabolites.loc['NADH:MITOCHONDRIA',variable] = False metabolites.loc['NAD+:MITOCHONDRIA',conc] = 2.600000e-03 metabolites.loc['NAD+:MITOCHONDRIA',variable] = False metabolites.loc['NADH:CYTOSOL',conc] = 8.300000e-05 metabolites.loc['NADH:CYTOSOL',variable] = False metabolites.loc['NAD+:CYTOSOL',conc] = 2.600000e-03 metabolites.loc['NAD+:CYTOSOL',variable] = False metabolites.loc['NADPH:CYTOSOL',conc] = 8.300000e-05 #also use 1.2e-4 metabolites.loc['NADPH:CYTOSOL',variable] = False metabolites.loc['NADP+:CYTOSOL',conc] = 2.600000e-03 #also use 2.1e-6 metabolites.loc['NADP+:CYTOSOL',variable] = False metabolites.loc['COA:MITOCHONDRIA',conc] = 1.400000e-03 metabolites.loc['COA:MITOCHONDRIA',variable] = False metabolites.loc['COA:CYTOSOL',conc] = 1.400000e-03 metabolites.loc['COA:CYTOSOL',variable] = False metabolites.loc['CO2:MITOCHONDRIA',conc] = 1.000000e-04 metabolites.loc['CO2:MITOCHONDRIA',variable] = False metabolites.loc['CO2:CYTOSOL',conc] = 1.000000e-04 metabolites.loc['CO2:CYTOSOL',variable] = False metabolites.loc['H2O:MITOCHONDRIA',conc] = 55.5 metabolites.loc['H2O:MITOCHONDRIA',variable] = False metabolites.loc['H2O:CYTOSOL',conc] = 55.5 metabolites.loc['H2O:CYTOSOL',variable] = False metabolites.loc['BETA-D-GLUCOSE:CYTOSOL',conc] = 2.0e-03 metabolites.loc['BETA-D-GLUCOSE:CYTOSOL',variable] = False metabolites.loc["CHITOBIOSE:CYTOSOL",conc] = 2.0e-09 metabolites.loc["CHITOBIOSE:CYTOSOL",variable] = False metabolites.loc['1,3-BETA-D-GLUCAN:CYTOSOL',conc] = 2.0e-09 metabolites.loc['1,3-BETA-D-GLUCAN:CYTOSOL',variable] = False metabolites.loc['L-GLUTAMINE:CYTOSOL',conc] = 2.0e-03 metabolites.loc['L-GLUTAMINE:CYTOSOL',variable] = False metabolites.loc['L-GLUTAMATE:CYTOSOL',conc] = 2.0e-04 metabolites.loc['L-GLUTAMATE:CYTOSOL',variable] = False metabolites.loc['CELLOBIOSE:CYTOSOL',conc] = 2.0e-04 metabolites.loc['CELLOBIOSE:CYTOSOL',variable] = False metabolites.loc['N-ACETYL-D-GLUCOSAMINE:CYTOSOL',conc] = 1.0e-08 metabolites.loc['N-ACETYL-D-GLUCOSAMINE:CYTOSOL',variable] = False #When loading experimental concentrations, first copy current #rule of thumb then overwrite with data values. metabolites[conc_exp] = metabolites[conc] metabolites.loc['2-OXOGLUTARATE:MITOCHONDRIA',conc_exp] = 0.0000329167257825644 metabolites.loc['ISOCITRATE:MITOCHONDRIA',conc_exp] = 0.000102471198594958 metabolites.loc['PHOSPHOENOLPYRUVATE:CYTOSOL',conc_exp] = 0.0000313819870767023 metabolites.loc['D-GLYCERALDEHYDE-3-PHOSPHATE:CYTOSOL',conc_exp] = 0.0000321630949358949 metabolites.loc['FUMARATE:MITOCHONDRIA',conc_exp] = 0.00128926137523035 metabolites.loc['L-GLUTAMINE:CYTOSOL',conc_exp] = 0.0034421144256392 metabolites.loc['PYRUVATE:MITOCHONDRIA',conc_exp] = 0.0000778160985710288 metabolites.loc['PYRUVATE:CYTOSOL',conc_exp] = 0.0000778160985710288 metabolites.loc['D-FRUCTOSE_6-PHOSPHATE:CYTOSOL',conc_exp] = 0.00495190614473117 metabolites.loc['D-RIBOSE-5-PHOSPHATE:CYTOSOL',conc_exp] = 0.0000849533575412862 metabolites.loc['CITRATE:MITOCHONDRIA',conc_exp] = 0.000485645834537379 metabolites.loc['CITRATE:CYTOSOL',conc_exp] = 0.000485645834537379 metabolites.loc['(S)-MALATE:MITOCHONDRIA',conc_exp] = 0.00213827060541153 metabolites.loc['(S)-MALATE:CYTOSOL',conc_exp] = 0.00213827060541153 metabolites.loc['SEDOHEPTULOSE_7-PHOSPHATE:CYTOSOL',conc_exp] = 0.00203246193132095 metabolites.loc['D-RIBULOSE-5-PHOSPHATE:CYTOSOL',conc_exp] = 0.000468439334729429 metabolites.loc['L-GLUTAMATE:CYTOSOL',conc_exp] = 0.00557167476932484 metabolites.loc['SUCCINATE:MITOCHONDRIA',conc_exp] = 0.000942614767220802 metabolites.loc['D-XYLULOSE-5-PHOSPHATE:CYTOSOL',conc_exp] = 0.000468439334729429 nvariables = metabolites[metabolites[variable]].count() nvar = nvariables[variable] metabolites.sort_values(by=variable, axis=0,ascending=False, inplace=True,) #print(metabolites) #%% nvariables = metabolites[metabolites[variable]].count() nvar = nvariables[variable] metabolites.sort_values(by=variable, axis=0,ascending=False, inplace=True,) #print(metabolites) # ## Prepare model for optimization # - Adjust S Matrix to use only reactions with activity > 0, if necessary. # - Water stoichiometry in the stiochiometric matrix needs to be set to zero since water is held constant. # - The initial concentrations of the variable metabolites are random. # - All concentrations are changed to log counts. # - Equilibrium constants are calculated from standard free energies of reaction. # - R (reactant) and P (product) matrices are derived from S. # Make sure all the indices and columns are in the correct order: active_reactions = reactions[reactions[enzyme_level] > 0.0] #print(reactions) #print(metabolites.index) Sactive_index = S_active.index active_reactions.reindex(index = Sactive_index, copy = False) S_active = S_active.reindex(columns = metabolites.index, copy = False) S_active['H2O:MITOCHONDRIA'] = 0 S_active['H2O:CYTOSOL'] = 0 ##################################### ##################################### #THIS IS MAKING FLUX -> 0.0 where_are_NaNs = np.isnan(S_active) S_active[where_are_NaNs] = 0 #print(S_active[:]) S_mat = S_active.values Keq_constant = np.exp(-active_reactions[deltag0].astype('float')/RT) #print(Keq_constant) Keq_constant = Keq_constant.values P_mat = np.where(S_mat>0,S_mat,0) R_back_mat = np.where(S_mat<0, S_mat, 0) E_regulation = np.ones(Keq_constant.size) # THis is the vector of enzyme activities, Range: 0 to 1. mu0 = 1 #Dummy parameter for now; reserved for free energies of formation #If no experimental data is available, we can estimate using 'rule-of-thumb' data at 0.001 conc_type=conc if (use_experimental_data): print("USING EXPERIMENTAL DATA") conc_type=conc_exp variable_concs = np.array(metabolites[conc_type].iloc[0:nvar].values, dtype=np.float64) v_log_concs = -10 + 10*np.random.rand(nvar) #Vary between 1 M to 1.0e-10 M v_concs = np.exp(v_log_concs) v_log_counts_stationary = np.log(v_concs*Concentration2Count) v_log_counts = v_log_counts_stationary #print(v_log_counts) fixed_concs = np.array(metabolites[conc_type].iloc[nvar:].values, dtype=np.float64) fixed_counts = fixed_concs*Concentration2Count f_log_counts = np.log(fixed_counts) complete_target_log_counts = np.log(Concentration2Count * metabolites[conc_type].values) target_v_log_counts = complete_target_log_counts[0:nvar] target_f_log_counts = complete_target_log_counts[nvar:] #WARNING:::::::::::::::CHANGE BACK TO ZEROS delta_increment_for_small_concs = (10**-50)*np.zeros(metabolites[conc_type].values.size); variable_concs_begin = np.array(metabolites[conc_type].iloc[0:nvar].values, dtype=np.float64) #%% Basic test v_log_counts = np.log(variable_concs_begin*Concentration2Count) #r_log_counts = -10 + 10*np.random.rand(v_log_counts.size) #v_log_counts = r_log_counts #print('====== Without adjusting Keq_constant ======') E_regulation = np.ones(Keq_constant.size) # THis is the vector of enzyme activities, Range: 0 to 1. nvar = v_log_counts.size #WARNING: INPUT LOG_COUNTS TO ALL FUNCTIONS. CONVERSION TO COUNTS IS DONE INTERNALLY res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts, method='lm',xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation)) if (res_lsq1.success==False): res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts,method='dogbox',xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation)) if (res_lsq1.success==False): res_lsq1 = least_squares(max_entropy_functions.derivatives, v_log_counts,method='trf',xtol=1e-15, args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation)) rxn_flux = max_entropy_functions.oddsDiff(res_lsq1.x, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant, E_regulation) # In[ ]: begin_log_metabolites = np.append(res_lsq1.x,f_log_counts) ########################################## ########################################## #####################TESTER############### E_regulation = np.ones(Keq_constant.size) # THis is the vector of enzyme activities, Range: 0 to 1. log_metabolites = np.append(res_lsq1.x,f_log_counts) KQ_f = max_entropy_functions.odds(log_metabolites,mu0,S_mat, R_back_mat, P_mat, delta_increment_for_small_concs,Keq_constant) Keq_inverse = np.power(Keq_constant,-1) KQ_r = max_entropy_functions.odds(log_metabolites,mu0,-S_mat, P_mat, R_back_mat, delta_increment_for_small_concs,Keq_inverse,-1) [RR,Jac] = max_entropy_functions.calc_Jac2(res_lsq1.x, f_log_counts, S_mat, delta_increment_for_small_concs, KQ_f, KQ_r, E_regulation) A = max_entropy_functions.calc_A(res_lsq1.x,f_log_counts, S_mat, Jac, E_regulation ) [ccc,fcc] = max_entropy_functions.conc_flux_control_coeff(nvar, A, S_mat, rxn_flux, RR) React_Choice=6 newE = max_entropy_functions.calc_reg_E_step(E_regulation,React_Choice, nvar, res_lsq1.x, f_log_counts, complete_target_log_counts, S_mat, A, rxn_flux,KQ_f) delta_S_metab = max_entropy_functions.calc_deltaS_metab(res_lsq1.x, target_v_log_counts); ipolicy = 7 #use ipolicy=1 or 4 reaction_choice = max_entropy_functions.get_enzyme2regulate(ipolicy, delta_S_metab, ccc, KQ_f, E_regulation, res_lsq1.x) #%% #device = torch.device("cpu") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device:', device) #set variables in ML program me.device=device me.v_log_counts_static = v_log_counts_stationary me.target_v_log_counts = target_v_log_counts me.complete_target_log_counts = complete_target_log_counts me.Keq_constant = Keq_constant me.f_log_counts = f_log_counts me.P_mat = P_mat me.R_back_mat = R_back_mat me.S_mat = S_mat me.delta_increment_for_small_concs = delta_increment_for_small_concs me.nvar = nvar me.mu0 = mu0 me.gamma = gamma me.num_rxns = Keq_constant.size me.penalty_reward_scalar=penalty_reward_scalar #%% N, D_in, H, D_out = 1, Keq_constant.size, 20*Keq_constant.size, 1 # Create random Tensors to hold inputs and outputs x = torch.rand(1000, D_in, device=device) nn_model = torch.nn.Sequential( torch.nn.Linear(D_in, H), torch.nn.Tanh(), torch.nn.Linear(H,D_out)).to(device) loss_fn = torch.nn.MSELoss(reduction='sum') #learning_rate=5e-6 #optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9) optimizer = torch.optim.SGD(nn_model.parameters(), lr=learning_rate, momentum=0.9) #optimizer = torch.optim.Adam(nn_model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=200, verbose=True, min_lr=1e-10,cooldown=10,threshold=1e-5) #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100, verbose=True, min_lr=1e-10,cooldown=10,threshold=1e-4) #%% SGD UPDATE TEST #attempted iterations to update theta_linear v_log_counts = v_log_counts_stationary.copy() episodic_loss = [] episodic_loss_max = [] episodic_epr = [] episodic_reward = [] episodic_nn_step = [] episodic_random_step = [] epsilon_greedy_init = epsilon final_states=np.zeros(Keq_constant.size) final_KQ_fs=np.zeros(Keq_constant.size) final_KQ_rs=np.zeros(Keq_constant.size) epr_per_state=[] was_state_terminal=[] for update in range(0,updates): x_changing = 1*torch.rand(1000, D_in, device=device) #generate state to use state_sample = np.zeros(Keq_constant.size) for sample in range(0,len(state_sample)): state_sample[sample] = np.random.uniform(1,1) #annealing test if ((update % eps_threshold== 0) and (update != 0)): epsilon=epsilon/2 print("RESET epsilon ANNEALING") print(epsilon) prediction_x_changing_previous = nn_model(x_changing) #nn_model.train() [sum_reward, average_loss,max_loss,final_epr,final_state,final_KQ_f,final_KQ_r, reached_terminal_state,\ random_steps_taken,nn_steps_taken] = me.sarsa_n(nn_model,loss_fn, optimizer, scheduler, state_sample, n_back_step, epsilon) print("EPISODE") print(update) print("MAXIMUM LAYER WEIGHTS") for layer in nn_model.modules(): try: print(torch.max(layer.weight)) except: print("") print('random,nn steps') print(random_steps_taken) print(nn_steps_taken) if (reached_terminal_state): was_state_terminal.append(1) else: was_state_terminal.append(0) final_states = np.vstack((final_states,final_state)) final_KQ_fs = np.vstack((final_KQ_fs,final_KQ_f)) final_KQ_rs = np.vstack((final_KQ_rs,final_KQ_r)) epr_per_state.append(final_epr) episodic_epr.append(final_epr) episodic_loss.append(average_loss) episodic_loss_max.append(max_loss) episodic_reward.append(sum_reward) episodic_nn_step.append(nn_steps_taken) episodic_random_step.append(random_steps_taken) scheduler.step(average_loss) print("TOTAL REWARD") print(sum_reward) print("ave loss") print(average_loss) print("max_loss") print(max_loss) print(optimizer.state_dict) print(scheduler.state_dict()) prediction_x_changing = nn_model(x_changing) total_prediction_changing_diff = sum(abs(prediction_x_changing - prediction_x_changing_previous)) print("TOTALPREDICTION") print(total_prediction_changing_diff) np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+ 'temp_episodic_loss_'+str(n_back_step) + '_lr'+str(learning_rate)+ '_'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_'+str(sim_number)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '.txt', episodic_loss, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+ 'temp_epr_'+str(n_back_step) + '_lr'+str(learning_rate)+ '_'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_'+str(sim_number)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '.txt', episodic_epr, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+ 'temp_episodic_reward_'+str(n_back_step)+ '_lr'+str(learning_rate)+ '_'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+'_'+str(sim_number)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '.txt', episodic_reward, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/data/'+ 'temp_final_states_'+str(n_back_step)+ '_lr'+str(learning_rate)+ '_'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+'_'+str(sim_number)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '.txt', final_states, fmt='%f') if (update > 200): if ((max(episodic_loss[-100:])-min(episodic_loss[-100:]) < 0.025) and (update > 350)): break #%% #gamma9 -> gamma=0.9 #n8 -> n_back_step=8 #k5 -> E=E-E/5 was used #lr5e6 -> begin lr=0.5*e-6 torch.save(nn_model, cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'complete_model_gly_tca_gog_gamma9_n'+str(n_back_step)+'_k5_'\ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_' +str(int(use_experimental_data))+ '_sim'+str(sim_number) + '.pth') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'episodic_terminal_state_gamma9_n'+str(n_back_step)+'_k5_' '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+ '.txt', was_state_terminal, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'episodic_loss_gamma9_n'+str(n_back_step)+'_k5_' '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+ '.txt', episodic_loss, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'episodic_loss_max_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+'_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+ '.txt', episodic_loss_max, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'episodic_reward_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+ '.txt', episodic_reward, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'final_states_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+\ '.txt', final_states, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'final_KQF_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+\ '.txt', final_KQ_fs, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'final_KQR_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_'+str(int(use_experimental_data))+ '_sim'+str(sim_number)+\ '.txt', final_KQ_rs, fmt='%f') np.savetxt(cwd+'/TCA_PPP_GLYCOLYSIS_CELLWALL/models_final_data/'+ 'epr_per_state_gamma9_n'+str(n_back_step)+'_k5_'+ '_lr'+str(learning_rate)+ '_threshold'+str(eps_threshold)+ '_eps'+str(epsilon_greedy_init)+ '_penalty_reward_scalar_'+str(me.penalty_reward_scalar)+ '_use_experimental_metab_' +str(int(use_experimental_data))+ '_sim'+str(sim_number)+ '.txt', epr_per_state, fmt='%f')