def simulation(): # Model Initiation model = LinearSystem(nsim=100, model_type='SISO', x0=np.array([0.5]), u0=np.array([1]), xs=np.array([5]), us=np.array([10]), step_size=0.2) # model = LinearSystem(nsim=100, model_type='MIMO', x0=np.array()) # Reinforcement Learning Initiation rl = ReinforceLearning(discount_factor=0.95, states_start=300, states_stop=340, states_interval=0.5, actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.5, epsilon=0.2, doe=1.2, eval_period=1) """ Example of user defined states and actions. Users do not need to do this. This is only if users want to define their own states and actions. RL will automatically populate states and actions if user does not input their own. """ states = np.zeros([27]) states[0:12] = np.linspace(0, 2.5, 12) states[12:27] = np.linspace(3, 8, 15) rl.user_states(list(states)) # actions = np.zeros([20]) # actions[0:5] = np.linspace(290, 298, 5) actions = np.linspace(5, 15, 16) # actions[30:35] = np.linspace(302, 310, 5) rl.user_actions(list(actions)) """ Load pre-trained Q, T and NT matrices """ q = np.loadtxt("Q_Matrix.txt") t = np.loadtxt("T_Matrix.txt") nt = np.loadtxt("NT_Matrix.txt") rl.user_matrices(q, t, nt) """ Simulation portion """ rlist = [] for episode in range(1): # Reset the model after each episode model.reset(random_init=False) tot_reward = 0 state = 0 action_index = 0 for t in range(1, model.Nsim + 1): """ Disturbance """ # if t % 30 == 0: # model.x[t - 1] += np.random.uniform(-5, 3) """ RL Evaluate """ if t % rl.eval_period == 0: state, action = rl.ucb_action_selection(model.x[t - 1, 0]) action, action_index = rl.action_selection(state, action, model.u[t - 1, 0], no_decay=25, ep_greedy=False, time=t, min_eps_rate=0.5) # Use interpolation to perform action action = rl.interpolation(model.x[t - 1, 0]) else: action = model.u[t - 1, :][0] next_state, reward, done, info = model.step([action], t, obj_function="MPC") """ Feedback evaluation """ if t == rl.eval_feedback: rl.matrix_update(action_index, reward, state, model.x[t, 0], 5) tot_reward = tot_reward + reward rlist.append(tot_reward) rl.autosave(episode, 250) if episode % 100 == 0: print(model.cost_function(transient_period=120)) return model, rl, rlist
def simulation(): # Reinforcement Learning Initiation rl = ReinforceLearning(discount_factor=0.95, states_start=300, states_stop=340, states_interval=0.5, actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.5, epsilon=0.8, doe=0, eval_period=1) """ Example of user defined states and actions. Users do not need to do this. This is only if users want to define their own states and actions. RL will automatically populate states and actions if user does not input their own. """ states = [] rl.x1 = np.linspace(1.5, 3.5, 16) rl.x2 = np.linspace(3.5, 5.5, 16) for i in rl.x1: for j in rl.x2: states.append([i, j]) rl.user_states(list(states)) actions = [] rl.u1 = np.linspace(-0.4, 0.4, 5) rl.u2 = np.linspace(-0.4, 0.4, 5) for i in rl.u1: for j in rl.u2: actions.append([i, j]) rl.user_actions(list(actions)) """ Load pre-trained Q, T and NT matrices """ q = np.loadtxt("Q_Matrix.txt") t = np.loadtxt("T_Matrix.txt") nt = np.loadtxt("NT_Matrix.txt") rl.user_matrices(q, t, nt) """ Simulation portion """ sim_length = 90 kp_ki = np.zeros([sim_length, 2]) kp_ki[0, :] = np.array([1.4, 3.3]) actions = np.zeros([sim_length, 2]) for t in range(1, sim_length): """ RL Evaluate """ state, action = rl.ucb_action_selection(kp_ki[t - 1, :]) action, action_index = rl.action_selection(state, action, actions[t - 1, :], no_decay=25, ep_greedy=False, time=t, min_eps_rate=0.4) actions[t, :] = action kp_ki[t, :], reward, x_trajectory = sim(kp_ki[t - 1], action) """ Feedback evaluation """ rl.matrix_update(action_index, reward, state, kp_ki[t, :], 5) # Bind actions if (kp_ki[t, :] > np.array([8, 8])).any() or (kp_ki[t, :] < np.array( [0, 0])).any(): kp_ki[t, :] = np.array([1.5, 3.5]) rl.autosave(t, 250) return kp_ki, actions, rl
def simulation(): # Model Initiation model = LinearSystem(nsim=100, model_type='SISO', x0=np.array([0.5]), u0=np.array([1]), xs=np.array([1.5]), us=np.array([3]), step_size=0.2) # Reinforcement Learning Initiation rl = ReinforceLearning(discount_factor=0.90, states_start=300, states_stop=340, states_interval=0.5, actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.1, epsilon=1, doe=0, eval_period=5) """ Example of user defined states and actions. Users do not need to do this. This is only if users want to define their own states and actions. RL will automatically populate states and actions if user does not input their own. """ # Set-point tracking errors states = np.zeros(39) states[0:5] = np.linspace(-4, -2, 5) states[34:39] = np.linspace(2, 4, 5) states[5:34] = np.linspace(-1.8, 1.8, 29) rl.user_states(list(states)) actions = np.linspace(-4, 4, 33) rl.user_actions(list(actions)) """ Load pre-trained Q, T and NT matrices """ q = np.loadtxt("Q_Matrix.txt") t = np.loadtxt("T_Matrix.txt") nt = np.loadtxt("NT_Matrix.txt") rl.user_matrices(q, t, nt) """ Simulation portion """ rlist = [] for episode in range(1): # Reset the model after each episode model.reset(random_init=False) tot_reward = 0 state = 0 action_index = 0 for t in range(1, model.Nsim + 1): """ Disturbance """ # if t % 21 == 0: # model.xs = np.array([np.random.uniform(1, 3)]) # # model.xs = np.array([2.5]) # print(model.xs) if t % 25 == 0: model.xs = np.array([2]) if t % 50 == 0: model.xs = np.array([1]) if t % 75 == 0: model.xs = np.array([1.5]) """ RL Evaluate """ tracking_error = (model.x[t - 1] - model.xs)[0] if t % rl.eval_period == 0: state, action = rl.ucb_action_selection(tracking_error) action, action_index = rl.action_selection(state, action, model.u[t - 1, 0], no_decay=25, ep_greedy=False, time=t, min_eps_rate=0.25) # Interpolation action selection # action = rl.interpolation(tracking_error) else: action = 0 inputs = model.u[t - 1] + action next_state, reward, done, info = model.step(inputs, t, obj_function="MPC") """ Feedback evaluation """ if t == rl.eval_feedback: feedback_tracking_error = (model.x[t, 0] - model.xs)[0] if abs(feedback_tracking_error) > 7.5: break rl.matrix_update(action_index, reward, state, feedback_tracking_error, 5) tot_reward = tot_reward + reward rlist.append(tot_reward) rl.autosave(episode, 250) if episode % 100 == 0: print(model.cost_function(transient_period=120)) return model, rl, rlist
def simulation(): # Model Initiation model = LinearSystem(nsim=100, model_type='MIMO', x0=np.array([1.333, 4]), u0=np.array([3, 6]), xs=np.array([3.555, 4.666]), us=np.array([5, 7]), step_size=0.2) # Reinforcement Learning Initiation rl = ReinforceLearning(discount_factor=0.95, states_start=300, states_stop=340, states_interval=0.5, actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.5, epsilon=0.2, doe=1.2, eval_period=1) """ Example of user defined states and actions. Users do not need to do this. This is only if users want to define their own states and actions. RL will automatically populate states and actions if user does not input their own. """ states = [] rl.x1 = np.linspace(0, 6, 25) rl.x2 = np.linspace(2, 6, 17) for i in rl.x1: for j in rl.x2: states.append([i, j]) rl.user_states(list(states)) actions = [] rl.u1 = np.linspace(1, 7, 19) rl.u2 = np.linspace(4, 9, 16) for i in rl.u1: for j in rl.u2: actions.append([i, j]) rl.user_actions(list(actions)) """ Load pre-trained Q, T and NT matrices """ q = np.loadtxt("Q_Matrix.txt") t = np.loadtxt("T_Matrix.txt") nt = np.loadtxt("NT_Matrix.txt") rl.user_matrices(q, t, nt) """ Simulation portion """ rlist = [] for episode in range(1): # Reset the model after each episode model.reset(random_init=False) tot_reward = 0 state = 0 action_index = 0 for t in range(1, model.Nsim + 1): """ Disturbance """ # if t % 10 == 0: # model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=2) """ RL Evaluate """ if t % rl.eval_period == 0: state, action = rl.ucb_action_selection(model.x[t - 1, :]) action, action_index = rl.action_selection(state, action, model.u[t - 1, :], no_decay=25, ep_greedy=True, time=t, min_eps_rate=0.5) else: action = model.u[t - 1, :][0] next_state, reward, done, info = model.step([action], t, obj_function="MPC") """ Feedback evaluation """ if t == rl.eval_feedback: rl.matrix_update(action_index, reward, state, model.x[t, :], 5) tot_reward = tot_reward + reward rlist.append(tot_reward) rl.autosave(episode, 250) if episode % 100 == 0: print(model.cost_function(transient_period=200)) return model, rl, rlist
def simulation(): # Model Initiation model = SIMOSystem(nsim=50, x0=np.array([2, 2 / 3]), u0=np.array([2]), xs=np.array([4, 4 / 3]), us=np.array([4]), step_size=0.2, control=False, q_cost=1, r_cost=0.5, s_cost=0.3, random_seed=1) # Reinforcement Learning Initiation rl = ReinforceLearning(discount_factor=0.9, states_start=300, states_stop=340, states_interval=0.5, actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.1, epsilon=0.9, doe=0, eval_period=1) """ Example of user defined states and actions. Users do not need to do this. This is only if users want to define their own states and actions. RL will automatically populate states and actions if user does not input their own. """ states = [] x1 = np.zeros(15) x1[0:2] = np.linspace(2, 3, 2) x1[2:13] = np.linspace(3.5, 4.5, 11) x1[13:15] = np.linspace(5, 5.5, 2) x2 = np.zeros(15) x2[0:2] = np.linspace(0, 2 / 3, 2) x2[2:13] = np.linspace(1, 2, 11) x2[5] = 1.33 x2[13:15] = np.linspace(2.25, 2.5, 2) rl.x1 = x1 rl.x2 = x2 for i in rl.x1: for j in rl.x2: states.append([i, j]) rl.user_states(list(states)) actions = np.linspace(2, 6, 17) rl.user_actions(list(actions)) """ Load pre-trained Q, T and NT matrices """ q = np.loadtxt("Q_Matrix.txt") t = np.loadtxt("T_Matrix.txt") nt = np.loadtxt("NT_Matrix.txt") rl.user_matrices(q, t, nt) """ Simulation portion """ rlist = [] for episode in range(1): # Reset the model after each episode model.reset(random_init=False) tot_reward = 0 state = 0 action_index = 0 for t in range(1, model.Nsim + 1): """ Disturbance """ # if t % 5 == 0: # model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=1) """ RL Evaluate """ if t % rl.eval_period == 0: state, action = rl.ucb_action_selection(model.x[t - 1, :]) action, action_index = rl.action_selection(state, action, model.u[t - 1, :], no_decay=25, ep_greedy=False, time=t, min_eps_rate=0.6) else: action = model.u[t - 1, :][0] next_state, reward, done, info = model.step([action], t, obj_function="MPC", delta_u='l1') """ Feedback evaluation """ if t == rl.eval_feedback: rl.matrix_update(action_index, reward, state, model.x[t, :], 5) tot_reward = tot_reward + reward rlist.append(tot_reward) rl.autosave(episode, 250) if episode % 100 == 0: print(model.cost_function(transient_period=200)) return model, rl, rlist
learning_rate=0.1, epsilon=0.1, doe=0, eval_period=5) states = np.zeros([75]) states[0:15] = np.arange(290, 310, 20 / 15) states[15:60] = np.arange(311, 330, 19 / 45) states[60:75] = np.arange(331, 350, 19 / 15) rl.user_states(list(states)) actions = np.zeros([20]) actions[1:20] = np.arange(-10, 10, 20 / 19) rl.user_actions(list(actions)) num_of_episodes = 1 tf.reset_default_graph() inputs = tf.placeholder(shape=[1, len(states)], dtype=tf.float32) W = tf.Variable(tf.random_uniform([len(states), len(actions)], 0, 0.01)) Qout = tf.matmul(inputs, W) predict = tf.argmax(Qout, 1) nextQ = tf.placeholder(shape=[1, len(actions)], dtype=tf.float32) loss = tf.reduce_sum(tf.square(nextQ - Qout)) trainer = tf.train.GradientDescentOptimizer(learning_rate=rl.learning_rate) updateModel = trainer.minimize(loss)