next_state, Reward, Done, Info = env.step( control_input, t, setpoint=[set_point1, set_point2], noise=False, economics='mixed', w_y1=1, w_y2=0) # RL Feedback if t == rl.eval_feedback and t > 150: rl.matrix_update( action_index, Reward, state, [env.y[t, 0] - set_point1, env.y[t, 1] - set_point2], 5) tot_reward = tot_reward + Reward rlist.append(tot_reward) # Autosave Q, T, and NT matrices rl.autosave(episode, 100) if episode % 10 == 0: print("Episode {} | Current Reward {}".format(episode, tot_reward)) env.plots(timestart=50, timestop=6000) # plt.scatter(PID1.u[40:env.y.shape[0]], env.y[40:, 0]) # plt.show() # plt.scatter(PID2.u[40:env.y.shape[0]], env.y[40:, 1]) # plt.show()
# Generate input tuple control_input = np.array([[input_1, input_2]]) # Simulate next time next_state, Reward, Done, Info = env.step( control_input, t, setpoint=[set_point1, set_point2], noise=False, economics='distillate') # RL Feedback if t == rl.eval_feedback: rl.matrix_update( action_index, Reward, state, env.y[t, :] - np.array([set_point1, set_point2]), 5) tot_reward = tot_reward + Reward rlist.append(tot_reward) # Autosave Q, T, and NT matrices rl.autosave(iteration, 250) env.plots(timestart=50, timestop=5950) # plt.scatter(PID1.u[40:env.y.shape[0]], env.y[40:, 0]) # plt.show() # plt.scatter(PID2.u[40:env.y.shape[0]], env.y[40:, 1]) # plt.show()