def __init__(self, TANK_PARAMS, TANK_DIST, MAIN_PARAMS): self.tanks = [] for i, PARAMS in enumerate(TANK_PARAMS): tank = Tank( height=PARAMS["height"], radius=PARAMS["width"], max_level=PARAMS["max_level"], min_level=PARAMS["min_level"], pipe_radius=PARAMS["pipe_radius"], init_level=PARAMS["init_level"], dist=TANK_DIST[i], ) self.tanks.append(tank) self.n_tanks = len(self.tanks) self.running = True self.terminated = [False] * self.n_tanks self.q_inn = [0] * (self.n_tanks + 1) self.show_rendering = MAIN_PARAMS["RENDER"] self.live_plot = MAIN_PARAMS["LIVE_REWARD_PLOT"] if self.show_rendering: self.window = Window(self.tanks) if self.live_plot: plt.ion() # enable interactivity plt.figure(num="Rewards per episode") # make a figure
def __init__(self, TANK_PARAMS_LIST, TANK_DIST_LIST, MAIN_PARAMS): self.model = [] for i, TANK_PARAMS in enumerate(TANK_PARAMS_LIST): tank = Tank( height=TANK_PARAMS["height"], radius=TANK_PARAMS["width"], max_level=TANK_PARAMS["max_level"], min_level=TANK_PARAMS["min_level"], pipe_radius=TANK_PARAMS["pipe_radius"], init_level=TANK_PARAMS["init_level"], dist=TANK_DIST_LIST[i], ) self.model.append(tank) self.running = True self.episode = 0 self.all_rewards = [] self.terminated = False self.show_rendering = MAIN_PARAMS["RENDER"] self.live_plot = MAIN_PARAMS["LIVE_REWARD_PLOT"] if self.show_rendering: self.window = Window(self.model) if self.live_plot: plt.ion() # enable interactivity plt.figure(num="Rewards per episode") # make a figure
def __init__(self): self.model = Tank(TANK_HEIGHT, TANK_RADIUS) # get model self.dist = InflowDist(DIST_PIPE_RADIUS, DIST_NOM_FLOW, DIST_VARIANCE_FLOW) self.add_dist = ADD_INFLOW self.action_delay = TBCC self.action_delay_counter = -OBSERVATIONS # Does not train on initial settings self.running = True self.episode = 0 self.all_rewards = [] self.terminated = False self.show_rendering = RENDER self.live_plot = LIVE_REWARD_PLOT if RENDER: self.window = Window(self.model) if LIVE_REWARD_PLOT: plt.ion() # enable interactivity fig = plt.figure(num="Rewards per episode") # make a figure
def __init__(self, TANK_PARAMS, TANK_DIST, MAIN_PARAMS): self.tanks = [] for i, PARAMS in enumerate(TANK_PARAMS): tank = Tank( height=PARAMS["height"], radius=PARAMS["width"], max_level=PARAMS["max_level"], min_level=PARAMS["min_level"], pipe_radius=PARAMS["pipe_radius"], init_level=PARAMS["init_level"], dist=TANK_DIST[i], ) self.tanks.append(tank) self.n_tanks = len(self.tanks) self.running = True self.terminated = [False] * self.n_tanks self.q_inn = [0] * (self.n_tanks + 1) self.show_rendering = MAIN_PARAMS["RENDER"] if self.show_rendering: self.window = Window(self.tanks)
class Environment: "Parameters are set in the params.py file" def __init__(self, TANK_PARAMS_LIST, TANK_DIST_LIST, MAIN_PARAMS): self.tanks = [] for i, PARAMS in enumerate(TANK_PARAMS_LIST): tank = Tank( height=PARAMS["height"], radius=PARAMS["width"], max_level=PARAMS["max_level"], min_level=PARAMS["min_level"], pipe_radius=PARAMS["pipe_radius"], init_level=PARAMS["init_level"], dist=TANK_DIST_LIST[i], ) self.tanks.append(tank) self.running = True self.episode = 0 self.terminated = False self.show_rendering = MAIN_PARAMS["RENDER"] if self.show_rendering: self.window = Window(self.tanks) def get_next_state(self, z, i, t, q_out): """ Calculates the dynamics of the agents action and gives back the next state """ dldt, q_out = self.tanks[i].get_dhdt(z, t, q_out) self.tanks[i].change_level(dldt) # Check terminate state if self.tanks[i].level < self.tanks[i].min: self.terminated = True self.tanks[i].level = self.tanks[i].min elif self.tanks[i].level > self.tanks[i].max: self.terminated = True self.tanks[i].level = self.tanks[i].max return self.tanks[i].level, q_out def render(self, action): "Draw the water level of the tank in pygame" if self.render: running = self.window.Draw(action) if not running: self.running = False
class Environment: "Parameters are set in the params.py file" def __init__(self, TANK_PARAMS_LIST, TANK_DIST_LIST, MAIN_PARAMS): self.model = [] for i, TANK_PARAMS in enumerate(TANK_PARAMS_LIST): tank = Tank( height=TANK_PARAMS["height"], radius=TANK_PARAMS["width"], max_level=TANK_PARAMS["max_level"], min_level=TANK_PARAMS["min_level"], pipe_radius=TANK_PARAMS["pipe_radius"], init_level=TANK_PARAMS["init_level"], dist=TANK_DIST_LIST[i], ) self.model.append(tank) self.running = True self.episode = 0 self.all_rewards = [] self.terminated = False self.show_rendering = MAIN_PARAMS["RENDER"] self.live_plot = MAIN_PARAMS["LIVE_REWARD_PLOT"] if self.show_rendering: self.window = Window(self.model) if self.live_plot: plt.ion() # enable interactivity plt.figure(num="Rewards per episode") # make a figure def get_next_state(self, z, i, t, q_out): """ Calculates the dynamics of the agents action and gives back the next state """ dldt, q_out = self.model[i].get_dhdt(z, t, q_out) self.model[i].change_level(dldt) # Check terminate state if self.model[i].level < self.model[i].min: self.terminated = True self.model[i].level = self.model[i].min elif self.model[i].level > self.model[i].max: self.terminated = True self.model[i].level = self.model[i].max return self.model[i].level, q_out def render(self, action): "Draw the water level of the tank in pygame" if self.render: running = self.window.Draw(action) if not running: self.running = False def get_reward(self, h): h = h / self.model.h reward = (h - 0.5) ** 2 return reward if h > 0.49 and h < 0.51: return 5 if h > 0.45 and h < 0.55: return 4 if h > 0.4 and h < 0.6: return 3 if h > 0.3 and h < 0.7: return 2 if h > 0.2 and h < 0.8: return 1 else: return 0 def plot_rewards(self): "drawnow plot of the reward" plt.plot( self.all_rewards, label="Exploration rate: {} %".format(self.epsilon * 100), ) plt.legend() def plot(self, all_rewards, epsilon): "Live plot of the reward" self.all_rewards = all_rewards self.epsilon = round(epsilon, 4) try: drawnow(self.plot_rewards) except KeyboardInterrupt: print("Break")
class Environment: "Parameters are set in the params.py file" def __init__(self, TANK_PARAMS, TANK_DIST, MAIN_PARAMS): self.tanks = [] for i, PARAMS in enumerate(TANK_PARAMS): tank = Tank( height=PARAMS["height"], radius=PARAMS["width"], max_level=PARAMS["max_level"], min_level=PARAMS["min_level"], pipe_radius=PARAMS["pipe_radius"], init_level=PARAMS["init_level"], dist=TANK_DIST[i], ) self.tanks.append(tank) self.n_tanks = len(self.tanks) self.running = True self.terminated = [False] * self.n_tanks self.q_inn = [0] * (self.n_tanks + 1) self.show_rendering = MAIN_PARAMS["RENDER"] if self.show_rendering: self.window = Window(self.tanks) def get_next_state(self, z, state, t): """ Calculates the dynamics of the agents action and gives back the next state """ next_state = [] prev_q_out = 0 for i in range(self.n_tanks): dldt, prev_q_out = self.tanks[i].get_dhdt(z[i], t, prev_q_out) self.q_inn[i + 1] = prev_q_out self.tanks[i].change_level(dldt) z_ = 0 if i == 0 else z[i - 1] # Check terminate state if self.tanks[i].level < self.tanks[i].min: self.terminated[i] = True self.tanks[i].level = self.tanks[i].min elif self.tanks[i].level > self.tanks[i].max: self.terminated[i] = True self.tanks[i].level = self.tanks[i].max grad = (dldt + 0.1) / 0.2 if self.tanks[i].level > 0.5 * self.tanks[i].h: above = 1 else: above = 0 next_state.append( np.array( [self.tanks[i].level / self.tanks[i].h, grad, above, z_])) return self.terminated, next_state def reset(self): "Reset the environment to the initial tank level and disturbance" init_state = [] self.terminated = [False] * self.n_tanks for i in range(self.n_tanks): self.tanks[i].reset() # reset to initial tank level if self.tanks[i].add_dist: self.tanks[i].dist.reset() # reset to nominal disturbance init_state.append( np.array([self.tanks[i].init_l / self.tanks[i].h, 0, 1, 0])) # Level plus gradient return [init_state], [] def render(self, action): "Draw the water level of the tank in pygame" if self.show_rendering: running = self.window.Draw(action) if not running: self.running = False
class Environment: "Parameters are set in the params.py file" def __init__(self, TANK_PARAMS, TANK_DIST, MAIN_PARAMS): self.tanks = [] for i, PARAMS in enumerate(TANK_PARAMS): tank = Tank( height=PARAMS["height"], radius=PARAMS["width"], max_level=PARAMS["max_level"], min_level=PARAMS["min_level"], pipe_radius=PARAMS["pipe_radius"], init_level=PARAMS["init_level"], dist=TANK_DIST[i], ) self.tanks.append(tank) self.n_tanks = len(self.tanks) self.running = True self.terminated = [False] * self.n_tanks self.q_inn = [0] * (self.n_tanks + 1) self.show_rendering = MAIN_PARAMS["RENDER"] self.live_plot = MAIN_PARAMS["LIVE_REWARD_PLOT"] if self.show_rendering: self.window = Window(self.tanks) if self.live_plot: plt.ion() # enable interactivity plt.figure(num="Rewards per episode") # make a figure def get_next_state(self, z, state, t): """ Calculates the dynamics of the agents action and gives back the next state """ next_state = [] prev_q_out = 0 for i in range(self.n_tanks): dldt, prev_q_out = self.tanks[i].get_dhdt(z[i], t, prev_q_out) self.q_inn[i + 1] = prev_q_out self.tanks[i].change_level(dldt) z_ = 0 if i == 0 else z[i - 1] # Check terminate state if self.tanks[i].level < self.tanks[i].min: self.terminated[i] = True self.tanks[i].level = self.tanks[i].min elif self.tanks[i].level > self.tanks[i].max: self.terminated[i] = True self.tanks[i].level = self.tanks[i].max if self.tanks[i].level > 0.5 * self.tanks[i].h: above = 1 else: above = 0 if len(state[i]) == 4: grad = (dldt + 0.1) / 0.2 next_state.append( np.array([ self.tanks[i].level / self.tanks[i].h, grad, above, z_ ])) else: next_state.append( np.array( [self.tanks[i].level / self.tanks[i].h, above, z_])) # next_state = next_state.reshape(1,2) return self.terminated, next_state def reset(self): "Reset the environment to the initial tank level and disturbance" init_state = [] self.terminated = [False] * self.n_tanks for i in range(self.n_tanks): self.tanks[i].reset() # reset to initial tank level if self.tanks[i].add_dist: self.tanks[i].dist.reset() # reset to nominal disturbance init_state.append( np.array([self.tanks[i].init_l / self.tanks[i].h, 0, 1, 0])) # Level plus gradient return [init_state], [] def render(self, action): "Draw the water level of the tank in pygame" if self.show_rendering: running = self.window.Draw(action) if not running: self.running = False def plot_rewards(self): "drawnow plot of the reward" plt.plot( self.all_rewards, label="Exploration rate: {} %".format(self.epsilon * 100), ) plt.legend() def plot(self, all_rewards, epsilon): "Live plot of the reward" self.all_rewards = all_rewards self.epsilon = round(epsilon, 4) try: drawnow(self.plot_rewards) except KeyboardInterrupt: print("Break")
class Environment(): def __init__(self): self.model = Tank(TANK_HEIGHT, TANK_RADIUS) # get model self.dist = InflowDist(DIST_PIPE_RADIUS, DIST_NOM_FLOW, DIST_VARIANCE_FLOW) self.add_dist = ADD_INFLOW self.action_delay = TBCC self.action_delay_counter = -OBSERVATIONS # Does not train on initial settings self.running = True self.episode = 0 self.all_rewards = [] self.terminated = False self.show_rendering = RENDER self.live_plot = LIVE_REWARD_PLOT if RENDER: self.window = Window(self.model) if LIVE_REWARD_PLOT: plt.ion() # enable interactivity fig = plt.figure(num="Rewards per episode") # make a figure def get_dhdt(self, action): if ADD_INFLOW: q_inn = self.dist.get_flow() q_inn = DIST_MIN_FLOW if q_inn < DIST_MIN_FLOW else q_inn q_inn = DIST_MAX_FLOW if q_inn > DIST_MAX_FLOW else q_inn else: q_inn = 0 f, A_pipe, g, l, delta_p, rho, r = self.model.get_params(action) term1 = q_inn / (np.pi * r**2) term2 = (f * A_pipe * np.sqrt(1 * g * l + delta_p / rho)) / (np.pi * r**2) return term1 - term2 # Eq: 1 def get_next_state(self, action, state): # models response to input change dldt = self.get_dhdt(action) self.model.change_level(dldt) # Check terminate state if self.model.l < self.model.min: self.model.l = self.model.min self.terminated = True elif self.model.l > self.model.max: self.model.l = self.model.max self.terminated = True next_state = state[1:] + [self.model.l] return self.terminated, next_state def reset(self): self.model.reset() # reset to initial tank level self.dist.reset() # reset to nominal disturbance self.terminated = False init_state = OBSERVATIONS * [self.model.init_l] return init_state, None, init_state, 0 # state,next_state,action,rewards,action_delay_counter def render(self, action, next_state): if RENDER: running = self.window.Draw(action, next_state) if not running: self.running = False def get_reward(self, state, terminated, t): if terminated: # sums up the rest of the episode time reward = -MAX_TIME * (state[-1] - SS_POSITION)**2 #reward = -(MAX_TIME-t)*(state[-1]-SS_POSITION)**2 return reward reward = -(state[-1] - SS_POSITION)**2 # MSE return reward def plot_rewards(self): plt.plot(self.all_rewards, label="Exploration rate: {} %".format(self.epsilon * 100)) plt.legend() def plot(self, all_rewards, epsilon): self.all_rewards = all_rewards self.epsilon = round(epsilon, 4) try: drawnow(self.plot_rewards) except: print("Break")