def potential_step(index, other_args):
    React_Choice=index
    
    state, v_log_counts, f_log_counts,\
    mu0, S_mat, R_back_mat, P_mat, \
    delta_increment_for_small_concs, Keq_constant = other_args
    
    
    
    newE = max_entropy_functions.calc_new_enzyme_simple(state, React_Choice)
    trial_state_sample = state.copy()#DO NOT MODIFY ORIGINAL STATE
    trial_state_sample[React_Choice] = newE

    new_res_lsq = least_squares(max_entropy_functions.derivatives, v_log_counts, method='lm',
                                xtol=1e-15, 
                                args=(f_log_counts, mu0, S_mat, R_back_mat, P_mat, 
                                      delta_increment_for_small_concs, Keq_constant, trial_state_sample))

    new_v_log_counts = new_res_lsq.x
    
    #minimal output is the new steady state concentrations. We can recalculate the trial_state_sample to avoid sending it out
    return [new_v_log_counts]
def policy_function(nn_model, state, v_log_counts_path, *args):
    #last input argument should be epsilon for use when using greedy-epsilon algorithm.
    varargin = args
    nargin = len(varargin)
    epsilon_greedy = 0.0
    if (nargin == 1):
        epsilon_greedy = varargin[0]

    used_random_step = False

    rxn_choices = [i for i in range(num_rxns)]

    unif_rand = np.random.uniform(0, 1)
    if ((unif_rand < epsilon_greedy) and (len(rxn_choices) > 0)):
        used_random_step = True
        random_choice = random.choice(rxn_choices)
        final_action = random_choice
        used_random_step = 1

        res_lsq = least_squares(max_entropy_functions.derivatives,
                                v_log_counts_path,
                                method='lm',
                                xtol=1e-15,
                                args=(f_log_counts, mu0, S_mat, R_back_mat,
                                      P_mat, delta_increment_for_small_concs,
                                      Keq_constant, state))

        final_v_log_counts = res_lsq.x

        new_log_metabolites = np.append(final_v_log_counts, f_log_counts)
        final_state = state.copy()
        newE = max_entropy_functions.calc_new_enzyme_simple(
            state, final_action)
        final_state = state.copy()  #DO NOT MODIFY ORIGINAL STATE
        final_state[final_action] = newE

        final_delta_s_metab = max_entropy_functions.calc_deltaS_metab(
            final_v_log_counts, target_v_log_counts)
        final_KQ_f = max_entropy_functions.odds(
            new_log_metabolites, mu0, S_mat, R_back_mat, P_mat,
            delta_increment_for_small_concs, Keq_constant)
        Keq_inverse = np.power(Keq_constant, -1)
        final_KQ_r = max_entropy_functions.odds(
            new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat,
            delta_increment_for_small_concs, Keq_inverse, -1)

        value_current_state = state_value(
            nn_model,
            torch.from_numpy(final_state).float().to(device))
        value_current_state = value_current_state.item()
        final_reward = reward_value(final_v_log_counts, v_log_counts_path, \
                                    final_KQ_f, final_KQ_r,\
                                    final_state, state)

    else:
        #In this, we must choose base on the best prediction base on environmental feedback

        v_log_counts = v_log_counts_path

        log_metabolites = np.append(v_log_counts, f_log_counts)

        rxn_flux = max_entropy_functions.oddsDiff(
            v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat,
            delta_increment_for_small_concs, Keq_constant, state)
        KQ_f = max_entropy_functions.odds(log_metabolites, mu0, S_mat,
                                          R_back_mat, P_mat,
                                          delta_increment_for_small_concs,
                                          Keq_constant)
        Keq_inverse = np.power(Keq_constant, -1)
        KQ_r = max_entropy_functions.odds(log_metabolites, mu0, -S_mat, P_mat,
                                          R_back_mat,
                                          delta_increment_for_small_concs,
                                          Keq_inverse, -1)

        [RR, Jac
         ] = max_entropy_functions.calc_Jac2(v_log_counts, f_log_counts, S_mat,
                                             delta_increment_for_small_concs,
                                             KQ_f, KQ_r, state)
        A = max_entropy_functions.calc_A(v_log_counts, f_log_counts, S_mat,
                                         Jac, state)

        delta_S_metab = max_entropy_functions.calc_deltaS_metab(
            v_log_counts, target_v_log_counts)

        [ccc, fcc] = max_entropy_functions.conc_flux_control_coeff(
            nvar, A, S_mat, rxn_flux, RR)

        indices = [i for i in range(0, len(Keq_constant))]

        #minimal varialbes to run optimization
        #variables=[state, v_log_counts, f_log_counts, mu0, S_mat, R_back_mat, P_mat, delta_increment_for_small_concs, Keq_constant]

        #with Pool() as pool:
        #    async_result = pool.starmap(potential_step, zip(indices, repeat(variables)))
        #    pool.close()
        #    pool.join()
        #end = time.time()
        #print(v_log_counts_path)
        async_result = pstep.dispatch(rxn_choices, S_mat, R_back_mat, P_mat,
                                      Keq_constant, state, f_log_counts,
                                      v_log_counts_path)
        #print(async_result[0])
        temp_action_value = -np.inf
        for act in range(0, len(async_result)):

            #new_v_log_counts = async_result[act][0] #output from poo
            new_v_log_counts = async_result[act]
            new_log_metabolites = np.append(new_v_log_counts, f_log_counts)

            trial_state_sample = state.copy()

            newE = max_entropy_functions.calc_new_enzyme_simple(state, act)
            trial_state_sample = state.copy()  #DO NOT MODIFY ORIGINAL STATE
            trial_state_sample[act] = newE
            new_delta_S_metab = max_entropy_functions.calc_deltaS_metab(
                new_v_log_counts, target_v_log_counts)

            KQ_f_new = max_entropy_functions.odds(
                new_log_metabolites, mu0, S_mat, R_back_mat, P_mat,
                delta_increment_for_small_concs, Keq_constant)
            KQ_r_new = max_entropy_functions.odds(
                new_log_metabolites, mu0, -S_mat, P_mat, R_back_mat,
                delta_increment_for_small_concs, Keq_inverse, -1)

            value_current_state = state_value(
                nn_model,
                torch.from_numpy(trial_state_sample).float().to(device))
            value_current_state = value_current_state.item()

            current_reward = reward_value(new_v_log_counts, v_log_counts, \
                                        KQ_f_new, KQ_r_new,\
                                        trial_state_sample, state)
            #print(current_reward)

            action_value = current_reward + (gamma) * value_current_state

            if (action_value > temp_action_value):
                #then a new action is the best.
                temp_action_value = action_value

                #set best output variables
                final_action = act
                final_reward = current_reward
                final_KQ_f = KQ_f_new
                final_KQ_r = KQ_r_new
                final_v_log_counts = new_v_log_counts
                final_state = trial_state_sample
                final_delta_s_metab = new_delta_S_metab
    #print(final_state)
    #print('final_delta_s_metab')
    #print(final_delta_s_metab)
    return [final_action,\
            final_reward,\
            final_KQ_f,\
            final_KQ_r,\
            final_v_log_counts,\
            final_state,\
            final_delta_s_metab,used_random_step,0.0,0.0]