class MPC: """MPC Optimizer. No Demand Charges and Two Stage actions implemented.""" def __init__(self, building, zones, start, end, window, lambda_val, non_controllable_data=None, debug=False): """ initialize instance variables :param building: (str) building name :param zones: [str] zone names :param start: (datetime timezone aware) :param end: (datetime timezone aware) :param window: (str) the interval in which to split the data. :param lambda_val: (float) lambda value for opjective function """ self.DataManager = DataManager(building, zones, start, end, window, non_controllable_data) self.start = start self.unix_start = start.timestamp() * 1e9 self.end = end self.unix_end = end.timestamp() * 1e9 self.window = window # timedelta string self.building = building self.zones = zones self.lambda_val = lambda_val self.debug = debug self.g = nx.DiGraph() # [TODO:Changed to MultiDiGraph... FIX print] def safety_check(self, node): for iter_zone in self.zones: curr_temperature = node.temperatures[iter_zone] curr_safety = self.DataManager.do_not_exceed[iter_zone].iloc[ node.timestep] if not (curr_safety["t_low"] <= curr_temperature <= curr_safety["t_high"]): return False return True def timestep_to_datetime(self, timestep): return self.start + timestep * datetime.timedelta( seconds=xsg.get_window_in_sec(self.window)) # the shortest path algorithm def shortest_path(self, root): """ Creates the graph using DFS and calculates the shortest path :param root: node being examined right now and needs to be added to graph. :return: root Node if root added else return None. """ if root is None: return None if root in self.g: return root # stop if node is past predictive horizon if self.timestep_to_datetime(root.timestep) >= self.end: self.g.add_node(root, objective_cost=0, best_action=None, best_successor=None) # no cost as leaf node return root # check if valid node if not self.safety_check(root): return None self.g.add_node(root, objective_cost=np.inf, best_action=None, best_successor=None) # creating children, adding corresponding edge and updating root's objective cost for action in itertools.product( [xsg.NO_ACTION, xsg.HEATING_ACTION, xsg.COOLING_ACTION], repeat=len(self.zones)): # TODO Compute temperatures properly temperatures = {} zone_actions = {} for i in range(len(self.zones)): zone_actions[self.zones[i]] = action[i] temperatures[self.zones[i]] = root.temperatures[self.zones[i]] + \ 1 * (action[i] == 1) - 1 * (action[i] == 2) # Create child node and call the shortest_path recursively on it child_node = Node(temperatures=temperatures, timestep=root.timestep + 1) child_node = self.shortest_path(child_node) if child_node is None: continue # get discomfort across edge discomfort = {} for iter_zone in self.zones: curr_comfortband = self.DataManager.comfortband[ iter_zone].iloc[root.timestep] curr_occupancy = self.DataManager.occupancy[iter_zone].iloc[ root.timestep] average_edge_temperature = ( root.temperatures[iter_zone] + child_node.temperatures[iter_zone]) / 2. discomfort[iter_zone] = self.DataManager.get_discomfort( self.building, average_edge_temperature, curr_comfortband["t_low"], curr_comfortband["t_high"], curr_occupancy) # Get consumption across edge price = 1 # self.prices.iloc[root.timestep] TODO also add right unit conversion, and duration consumption_cost = { self.zones[i]: price * self.DataManager.hvac_consumption[self.zones[i]][action[i]] for i in range(len(self.zones)) } # add edge self.g.add_edge(root, child_node, action=zone_actions, discomfort=discomfort, consumption_cost=consumption_cost) # update root node to contain the best child. total_edge_cost = ((1 - self.lambda_val) * (sum(consumption_cost.values()))) + ( self.lambda_val * (sum(discomfort.values()))) objective_cost = self.g.node[child_node][ "objective_cost"] + total_edge_cost if objective_cost < self.g.node[root]["objective_cost"]: self.g.node[root]["objective_cost"] = objective_cost self.g.node[root]["best_action"] = zone_actions self.g.node[root]["best_successor"] = child_node return root def reconstruct_path(self, root): """ Util function that reconstructs the best action path Parameters ---------- graph : networkx graph Returns ------- List """ graph = self.g if root not in self.g: raise Exception("Root does not exist in MPC graph.") path = [root] while graph.node[root]['best_successor'] is not None: root = graph.node[root]['best_successor'] path.append(root) return path # def g_plot(self, zone): # try: # os.remove('mpc_graph_' + zone + '.html') # except OSError: # pass # fig = plotly_figure(self.advise_unit.g, path=self.path) # py.plot(fig, filename='mpc_graph_' + zone + '.html', auto_open=False) def advise(self, starting_temperatures): """Call this function to get best action. :param starting_temperatures: dict {zone: float temperature} :return: action, err """ root = Node(starting_temperatures, 0) root = self.shortest_path(root) if root is None: return None, "Could not find feasible action." return self.g.node[root]["best_action"], None
class BuildingEnv(gym.Env): def __init__(self, env_config): self.DataManager = DataManager(env_config["building"], env_config["zones"], env_config["start"], env_config["end"], env_config["window"]) self.start = start self.unix_start = start.timestamp() * 1e9 self.end = end self.unix_end = end.timestamp() * 1e9 self.window = window # timedelta string self.building = building self.zones = zones self.lambda_val = env_config["lambda_val"] # assert self.zones == all zones in building. this is because of the thermal model needing other zone temperatures. self.curr_timestep = 0 self.indoor_starting_temperatures = env_config[ "indoor_starting_temperatures"] # to get starting temperatures [last, current] self.outdoor_starting_temperature = env_config[ "outdoor_starting_temperature"] self.tstats = {} for iter_zone in self.zones: self.tstats[iter_zone] = Tstat( self.building, iter_zone, self.indoor_starting_temperatures[iter_zone]["current"], last_temperature=self.indoor_starting_temperatures[iter_zone] ["last"]) assert 60 * 60 % xsg.get_window_in_sec( self.window) == 0 # window divides an hour assert (self.end - self.start).total_seconds() % xsg.get_window_in_sec( self.window) == 0 # window divides the timeframe # the number of timesteps self.num_timesteps = int((self.end - self.start).total_seconds() / xsg.get_window_in_sec(self.window)) self.unit = env_config["unit"] assert self.unit == "F" # all zones current and last temperature = 2*num_zones # building outside temperature -> make a class for how this behaves = 1 # timestep -> do one hot encoding of week, day, hour, window \approx 4 + 7 + 24 + 60*60 / window low_bound = [32] * 2 * len( self.zones ) # we could use parametric temperature bounds... for now we will give negative inft reward low_bound += [-100] # for outside temperature we cannot gurantee much high_bound = [100] * 2 * len(self.zones) high_bound += [200] # for outside temperature we cannot gurantee much low_bound += [0] * ( self.num_timesteps + 1 ) # total timesteps plus the final timestep which wont be executed high_bound += [1] * ( self.num_timesteps + 1 ) # total timesteps plus the final timestep which wont be executed self.observation_space = Box(low=np.array(low_bound), high=np.array(high_bound), dtype=np.float32) self.action_space = Tuple((Discrete(3), ) * len(self.zones)) self.reset() def reset(self): self.curr_timestep = 0 for iter_zone in self.zones: self.tstats[iter_zone].reset( self.indoor_starting_temperatures[iter_zone]["current"], last_temperature=self.indoor_starting_temperatures[iter_zone] ["last"]) self.outdoor_temperature = self.outdoor_starting_temperature return self.create_curr_obs() # obs def step(self, action): self.curr_timestep += 1 # if we reach the end time. if self.curr_timestep == self.num_timesteps: return self.create_curr_obs(), 0, True, {} # find what new temperature would be. use thermal model with uncertainty. use reset if exceeding # do_not_exceed. can't force it to take a different action anymore. # update temperatures for i, iter_zone in enumerate(self.zones): self.tstats[iter_zone].next_temperature(action[i]) self.outdoor_temperature += np.random.normal( ) # TODO we should make a thermostat for the outdoor temperature. # check that in saftey temperature band for iter_zone in self.zones: curr_safety = self.DataManager.do_not_exceed[iter_zone].iloc[ self.curr_timestep] if not (curr_safety["t_low"] <= self.tstats[iter_zone].temperature <= curr_safety["t_high"]): return self.create_curr_obs(), -INF_REWARD, True, { } # TODO do we want to add info? # get reward by calling discomfort and consumption model ... reward = self.get_reward(action) return self.create_curr_obs(), reward, False, { } # obs, reward, done, info def get_reward(self, action): """Get the reward for the given action with the current observation parameters.""" # get discomfort across edge discomfort = {} for iter_zone in self.zones: # TODO Check this again since we are a timestep ahead and we want average comfortband and average occupancy over the edge. curr_comfortband = self.DataManager.comfortband[iter_zone].iloc[ self.curr_timestep] curr_occupancy = self.DataManager.occupancy[iter_zone].iloc[ self.curr_timestep] curr_tstat = self.tstats[iter_zone] average_edge_temperature = (curr_tstat.temperature + curr_tstat.last_temperature) / 2. discomfort[iter_zone] = self.DataManager.get_discomfort( self.building, average_edge_temperature, curr_comfortband["t_low"], curr_comfortband["t_high"], curr_occupancy) # Get consumption across edge price = 1 # self.prices.iloc[root.timestep] TODO also add right unit conversion, and duration consumption_cost = { self.zones[i]: price * self.DataManager.hvac_consumption[self.zones[i]][action[i]] for i in range(len(self.zones)) } cost = ( (1 - self.lambda_val) * (sum(consumption_cost.values()))) + (self.lambda_val * (sum(discomfort.values()))) return -cost def create_curr_obs(self): return self._create_obs(self.tstats, self.outdoor_temperature, self.curr_timestep) def _create_obs(self, tstats, outdoor_temperature, curr_timestep): obs = np.zeros(self.observation_space.low.shape) idx = 0 for iter_zone in self.zones: obs[idx] = tstats[iter_zone].last_temperature idx += 1 obs[idx] = tstats[iter_zone].temperature idx += 1 obs[idx] = outdoor_temperature idx += 1 obs[idx + curr_timestep] = 1 return obs