def create_env(building_uids, **kwargs): data_folder = Path("data/") demand_file = data_folder / "AustinResidential_TH.csv" weather_file = data_folder / 'Austin_Airp_TX-hour.csv' max_action_val = kwargs["max_action_val"] min_action_val = kwargs["min_action_val"] target_cooling = kwargs["target_cooling"] heat_pump, heat_tank, cooling_tank = {}, {}, {} loss_coeff, efficiency = 0.19 / 24, 1. # Ref: Assessment of energy efficiency in electric storage water heaters (2008 Energy and Buildings) buildings = [] for uid in building_uids: heat_pump[uid] = HeatPump(nominal_power=9e12, eta_tech=0.22, t_target_heating=45, t_target_cooling=target_cooling) heat_tank[uid] = EnergyStorage(capacity=9e12, loss_coeff=loss_coeff) cooling_tank[uid] = EnergyStorage(capacity=9e12, loss_coeff=loss_coeff) buildings.append( Building(uid, heating_storage=heat_tank[uid], cooling_storage=cooling_tank[uid], heating_device=heat_pump[uid], cooling_device=heat_pump[uid], sub_building_uids=[uid])) buildings[-1].state_space(np.array([24.0, 40.0, 1.001]), np.array([1.0, 17.0, -0.001])) buildings[-1].action_space(np.array([max_action_val]), np.array([min_action_val])) building_loader(demand_file, weather_file, buildings) auto_size(buildings, t_target_heating=45, t_target_cooling=target_cooling) env = CityLearn(demand_file, weather_file, buildings=buildings, time_resolution=1, simulation_period=(kwargs["start_time"] - 1, kwargs["end_time"])) return env, buildings, heat_pump, heat_tank, cooling_tank
def building_loader(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions, save_memory = True): with open(building_attributes) as json_file: data = json.load(json_file) buildings, observation_spaces, action_spaces = {},[],[] s_low_central_agent, s_high_central_agent, appended_states = [], [], [] a_low_central_agent, a_high_central_agent, appended_actions = [], [], [] for uid, attributes in zip(data, data.values()): if uid in building_ids: heat_pump = HeatPump(nominal_power = attributes['Heat_Pump']['nominal_power'], eta_tech = attributes['Heat_Pump']['technical_efficiency'], t_target_heating = attributes['Heat_Pump']['t_target_heating'], t_target_cooling = attributes['Heat_Pump']['t_target_cooling'], save_memory = save_memory) electric_heater = ElectricHeater(nominal_power = attributes['Electric_Water_Heater']['nominal_power'], efficiency = attributes['Electric_Water_Heater']['efficiency'], save_memory = save_memory) chilled_water_tank = EnergyStorage(capacity = attributes['Chilled_Water_Tank']['capacity'], loss_coeff = attributes['Chilled_Water_Tank']['loss_coefficient'], save_memory = save_memory) dhw_tank = EnergyStorage(capacity = attributes['DHW_Tank']['capacity'], loss_coeff = attributes['DHW_Tank']['loss_coefficient'], save_memory = save_memory) building = Building(buildingId = uid, dhw_storage = dhw_tank, cooling_storage = chilled_water_tank, dhw_heating_device = electric_heater, cooling_device = heat_pump, save_memory = save_memory) data_file = str(uid) + '.csv' simulation_data = data_path / data_file with open(simulation_data) as csv_file: data = pd.read_csv(csv_file) building.sim_results['cooling_demand'] = list(data['Cooling Load [kWh]']) building.sim_results['dhw_demand'] = list(data['DHW Heating [kWh]']) building.sim_results['non_shiftable_load'] = list(data['Equipment Electric Power [kWh]']) building.sim_results['month'] = list(data['Month']) building.sim_results['day'] = list(data['Day Type']) building.sim_results['hour'] = list(data['Hour']) building.sim_results['daylight_savings_status'] = list(data['Daylight Savings Status']) building.sim_results['t_in'] = list(data['Indoor Temperature [C]']) building.sim_results['avg_unmet_setpoint'] = list(data['Average Unmet Cooling Setpoint Difference [C]']) building.sim_results['rh_in'] = list(data['Indoor Relative Humidity [%]']) with open(weather_file) as csv_file: weather_data = pd.read_csv(csv_file) building.sim_results['t_out'] = list(weather_data['Outdoor Drybulb Temperature [C]']) building.sim_results['rh_out'] = list(weather_data['Outdoor Relative Humidity [%]']) building.sim_results['diffuse_solar_rad'] = list(weather_data['Diffuse Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad'] = list(weather_data['Direct Solar Radiation [W/m2]']) # Reading weather forecasts building.sim_results['t_out_pred_6h'] = list(weather_data['6h Prediction Outdoor Drybulb Temperature [C]']) building.sim_results['t_out_pred_12h'] = list(weather_data['12h Prediction Outdoor Drybulb Temperature [C]']) building.sim_results['t_out_pred_24h'] = list(weather_data['24h Prediction Outdoor Drybulb Temperature [C]']) building.sim_results['rh_out_pred_6h'] = list(weather_data['6h Prediction Outdoor Relative Humidity [%]']) building.sim_results['rh_out_pred_12h'] = list(weather_data['12h Prediction Outdoor Relative Humidity [%]']) building.sim_results['rh_out_pred_24h'] = list(weather_data['24h Prediction Outdoor Relative Humidity [%]']) building.sim_results['diffuse_solar_rad_pred_6h'] = list(weather_data['6h Prediction Diffuse Solar Radiation [W/m2]']) building.sim_results['diffuse_solar_rad_pred_12h'] = list(weather_data['12h Prediction Diffuse Solar Radiation [W/m2]']) building.sim_results['diffuse_solar_rad_pred_24h'] = list(weather_data['24h Prediction Diffuse Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad_pred_6h'] = list(weather_data['6h Prediction Direct Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad_pred_12h'] = list(weather_data['12h Prediction Direct Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad_pred_24h'] = list(weather_data['24h Prediction Direct Solar Radiation [W/m2]']) # Reading the building attributes building.building_type = attributes['Building_Type'] building.climate_zone = attributes['Climate_Zone'] building.solar_power_capacity = attributes['Solar_Power_Installed(kW)'] with open(solar_profile) as csv_file: data = pd.read_csv(csv_file) building.sim_results['solar_gen'] = list(attributes['Solar_Power_Installed(kW)']*data['Hourly Data: AC inverter power (W)']/1000) # Finding the max and min possible values of all the states, which can then be used by the RL agent to scale the states and train any function approximators more effectively s_low, s_high = [], [] for state_name, value in zip(buildings_states_actions[uid]['states'], buildings_states_actions[uid]['states'].values()): if value == True: if state_name == "net_electricity_consumption": # lower and upper bounds of net electricity consumption are rough estimates and may not be completely accurate. Scaling this state-variable using these bounds may result in normalized values above 1 or below 0. _net_elec_cons_upper_bound = max(np.array(building.sim_results['non_shiftable_load']) - np.array(building.sim_results['solar_gen']) + np.array(building.sim_results['dhw_demand'])/.8 + np.array(building.sim_results['cooling_demand']) + building.dhw_storage.capacity/.8 + building.cooling_storage.capacity/2) s_low.append(0.) s_high.append(_net_elec_cons_upper_bound) s_low_central_agent.append(0.) s_high_central_agent.append(_net_elec_cons_upper_bound) elif state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc': s_low.append(min(building.sim_results[state_name])) s_high.append(max(building.sim_results[state_name])) # Create boundaries of the observation space of a centralized agent (if a central agent is being used instead of decentralized ones). We include all the weather variables used as states, and use the list appended_states to make sure we don't include any repeated states (i.e. weather variables measured by different buildings) if state_name in ['t_in', 'avg_unmet_setpoint', 'rh_in', 'non_shiftable_load', 'solar_gen']: s_low_central_agent.append(min(building.sim_results[state_name])) s_high_central_agent.append(max(building.sim_results[state_name])) elif state_name not in appended_states: s_low_central_agent.append(min(building.sim_results[state_name])) s_high_central_agent.append(max(building.sim_results[state_name])) appended_states.append(state_name) else: s_low.append(0.0) s_high.append(1.0) s_low_central_agent.append(0.0) s_high_central_agent.append(1.0) '''The energy storage (tank) capacity indicates how many times bigger the tank is compared to the maximum hourly energy demand of the building (cooling or DHW respectively), which sets a lower bound for the action of 1/tank_capacity, as the energy storage device can't provide the building with more energy than it will ever need for a given hour. The heat pump is sized using approximately the maximum hourly energy demand of the building (after accounting for the COP, see function autosize). Therefore, we make the fair assumption that the action also has an upper bound equal to 1/tank_capacity. This boundaries should speed up the learning process of the agents and make them more stable rather than if we just set them to -1 and 1. I.e. if Chilled_Water_Tank.Capacity is 3 (3 times the max. hourly demand of the building in the entire year), its actions will be bounded between -1/3 and 1/3''' a_low, a_high = [], [] for action_name, value in zip(buildings_states_actions[uid]['actions'], buildings_states_actions[uid]['actions'].values()): if value == True: if action_name =='cooling_storage': # Avoid division by 0 if attributes['Chilled_Water_Tank']['capacity'] > 0.000001: a_low.append(max(-1.0/attributes['Chilled_Water_Tank']['capacity'], -1.0)) a_high.append(min(1.0/attributes['Chilled_Water_Tank']['capacity'], 1.0)) a_low_central_agent.append(max(-1.0/attributes['Chilled_Water_Tank']['capacity'], -1.0)) a_high_central_agent.append(min(1.0/attributes['Chilled_Water_Tank']['capacity'], 1.0)) else: a_low.append(-1.0) a_high.append(1.0) a_low_central_agent.append(-1.0) a_high_central_agent.append(1.0) else: if attributes['DHW_Tank']['capacity'] > 0.000001: a_low.append(max(-1.0/attributes['DHW_Tank']['capacity'], -1.0)) a_high.append(min(1.0/attributes['DHW_Tank']['capacity'], 1.0)) a_low_central_agent.append(max(-1.0/attributes['DHW_Tank']['capacity'], -1.0)) a_high_central_agent.append(min(1.0/attributes['DHW_Tank']['capacity'], 1.0)) else: a_low.append(-1.0) a_high.append(1.0) a_low_central_agent.append(-1.0) a_high_central_agent.append(1.0) building.set_state_space(np.array(s_high), np.array(s_low)) building.set_action_space(np.array(a_high), np.array(a_low)) observation_spaces.append(building.observation_space) action_spaces.append(building.action_space) buildings[uid] = building observation_space_central_agent = spaces.Box(low=np.float32(np.array(s_low_central_agent)), high=np.float32(np.array(s_high_central_agent)), dtype=np.float32) action_space_central_agent = spaces.Box(low=np.float32(np.array(a_low_central_agent)), high=np.float32(np.array(a_high_central_agent)), dtype=np.float32) for building in buildings.values(): # If the DHW device is a HeatPump if isinstance(building.dhw_heating_device, HeatPump): # Calculating COPs of the heat pumps for every hour building.dhw_heating_device.cop_heating = building.dhw_heating_device.eta_tech*(building.dhw_heating_device.t_target_heating + 273.15)/(building.dhw_heating_device.t_target_heating - weather_data['Outdoor Drybulb Temperature [C]']) building.dhw_heating_device.cop_heating[building.dhw_heating_device.cop_heating < 0] = 20.0 building.dhw_heating_device.cop_heating[building.dhw_heating_device.cop_heating > 20] = 20.0 building.dhw_heating_device.cop_heating = building.dhw_heating_device.cop_heating.to_numpy() building.cooling_device.cop_cooling = building.cooling_device.eta_tech*(building.cooling_device.t_target_cooling + 273.15)/(weather_data['Outdoor Drybulb Temperature [C]'] - building.cooling_device.t_target_cooling) building.cooling_device.cop_cooling[building.cooling_device.cop_cooling < 0] = 20.0 building.cooling_device.cop_cooling[building.cooling_device.cop_cooling > 20] = 20.0 building.cooling_device.cop_cooling = building.cooling_device.cop_cooling.to_numpy() building.reset() auto_size(buildings) return buildings, observation_spaces, action_spaces, observation_space_central_agent, action_space_central_agent
def building_loader(building_attributes, solar_profile, building_ids, buildings_states_actions): with open(building_attributes) as json_file: data = json.load(json_file) buildings, observation_spaces, action_spaces = [], [], [] for uid, attributes in zip(data, data.values()): if uid in building_ids: heat_pump = HeatPump( nominal_power=attributes['Heat_Pump']['nominal_power'], eta_tech=attributes['Heat_Pump']['technical_efficiency'], t_target_heating=attributes['Heat_Pump']['t_target_heating'], t_target_cooling=attributes['Heat_Pump']['t_target_cooling']) electric_heater = ElectricHeater( nominal_power=attributes['Electric_Water_Heater'] ['nominal_power'], efficiency=attributes['Electric_Water_Heater']['efficiency']) chilled_water_tank = EnergyStorage( capacity=attributes['Chilled_Water_Tank']['capacity'], loss_coeff=attributes['Chilled_Water_Tank'] ['loss_coefficient']) dhw_tank = EnergyStorage( capacity=attributes['DHW_Tank']['capacity'], loss_coeff=attributes['DHW_Tank']['loss_coefficient']) building = Building(buildingId=uid, dhw_storage=dhw_tank, cooling_storage=chilled_water_tank, dhw_heating_device=electric_heater, cooling_device=heat_pump) with open('data//' + uid + '.csv') as csv_file: data = pd.read_csv(csv_file) building.sim_results['cooling_demand'] = data['Cooling Load [kWh]'] building.sim_results['dhw_demand'] = data['DHW Heating [kWh]'] building.sim_results['non_shiftable_load'] = data[ 'Equipment Electric Power [kWh]'] building.sim_results['day'] = data['Day Type'] building.sim_results['hour'] = data['Hour'] building.sim_results['daylight_savings_status'] = data[ 'Daylight Savings Status'] building.sim_results['t_out'] = data[ 'Outdoor Drybulb Temperature [C]'] building.sim_results['rh_out'] = data[ 'Outdoor Relative Humidity [%]'] building.sim_results['diffuse_solar_rad'] = data[ 'Diffuse Solar Radiation [W/m2]'] building.sim_results['direct_solar_rad'] = data[ 'Direct Solar Radiation [W/m2]'] building.sim_results['t_in'] = data['Indoor Temperature [C]'] building.sim_results['avg_unmet_setpoint'] = data[ 'Average Unmet Cooling Setpoint Difference [C]'] building.sim_results['rh_in'] = data[ 'Indoor Relative Humidity [%]'] with open(solar_profile) as csv_file: data = pd.read_csv(csv_file) building.sim_results[ 'solar_gen'] = attributes['Solar_Power_Installed(kW)'] * data[ 'Hourly Data: AC inverter power (W)'] / 1000 # Finding the max and min possible values of all the states, which can then be used by the RL agent to scale the states and train any function approximators more effectively s_low, s_high = [], [] for state_name, value in zip( buildings_states_actions[uid]['states'], buildings_states_actions[uid]['states'].values()): if value == True: if state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc': s_low.append(building.sim_results[state_name].min()) s_high.append(building.sim_results[state_name].max()) else: s_low.append(0.0) s_high.append(1.0) a_low, a_high = [], [] for state_name, value in zip( buildings_states_actions[uid]['actions'], buildings_states_actions[uid]['actions'].values()): if value == True: a_low.append(0.0) a_high.append(1.0) building.set_state_space(np.array(s_high), np.array(s_low)) building.set_action_space(np.array(a_high), np.array(a_low)) observation_spaces.append(building.observation_space) action_spaces.append(building.action_space) buildings.append(building) auto_size(buildings) return buildings, observation_spaces, action_spaces
def building_loader(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions): with open(building_attributes) as json_file: data = json.load(json_file) buildings, observation_spaces, action_spaces = {},[],[] for uid, attributes in zip(data, data.values()): if uid in building_ids: heat_pump = HeatPump(nominal_power = attributes['Heat_Pump']['nominal_power'], eta_tech = attributes['Heat_Pump']['technical_efficiency'], t_target_heating = attributes['Heat_Pump']['t_target_heating'], t_target_cooling = attributes['Heat_Pump']['t_target_cooling']) electric_heater = ElectricHeater(nominal_power = attributes['Electric_Water_Heater']['nominal_power'], efficiency = attributes['Electric_Water_Heater']['efficiency']) chilled_water_tank = EnergyStorage(capacity = attributes['Chilled_Water_Tank']['capacity'], loss_coeff = attributes['Chilled_Water_Tank']['loss_coefficient']) dhw_tank = EnergyStorage(capacity = attributes['DHW_Tank']['capacity'], loss_coeff = attributes['DHW_Tank']['loss_coefficient']) building = Building(buildingId = uid, dhw_storage = dhw_tank, cooling_storage = chilled_water_tank, dhw_heating_device = electric_heater, cooling_device = heat_pump) data_file = str(uid) + '.csv' simulation_data = data_path / data_file with open(simulation_data) as csv_file: data = pd.read_csv(csv_file) building.sim_results['cooling_demand'] = list(data['Cooling Load [kWh]']) building.sim_results['dhw_demand'] = list(data['DHW Heating [kWh]']) building.sim_results['non_shiftable_load'] = list(data['Equipment Electric Power [kWh]']) building.sim_results['month'] = list(data['Month']) building.sim_results['day'] = list(data['Day Type']) building.sim_results['hour'] = list(data['Hour']) building.sim_results['daylight_savings_status'] = list(data['Daylight Savings Status']) building.sim_results['t_in'] = list(data['Indoor Temperature [C]']) building.sim_results['avg_unmet_setpoint'] = list(data['Average Unmet Cooling Setpoint Difference [C]']) building.sim_results['rh_in'] = list(data['Indoor Relative Humidity [%]']) with open(weather_file) as csv_file: weather_data = pd.read_csv(csv_file) building.sim_results['t_out'] = list(weather_data['Outdoor Drybulb Temperature [C]']) building.sim_results['rh_out'] = list(weather_data['Outdoor Relative Humidity [%]']) building.sim_results['diffuse_solar_rad'] = list(weather_data['Diffuse Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad'] = list(weather_data['Direct Solar Radiation [W/m2]']) # Reading weather forecasts building.sim_results['t_out_pred_6h'] = list(weather_data['6h Prediction Outdoor Drybulb Temperature [C]']) building.sim_results['t_out_pred_12h'] = list(weather_data['12h Prediction Outdoor Drybulb Temperature [C]']) building.sim_results['t_out_pred_24h'] = list(weather_data['24h Prediction Outdoor Drybulb Temperature [C]']) building.sim_results['rh_out_pred_6h'] = list(weather_data['6h Prediction Outdoor Relative Humidity [%]']) building.sim_results['rh_out_pred_12h'] = list(weather_data['12h Prediction Outdoor Relative Humidity [%]']) building.sim_results['rh_out_pred_24h'] = list(weather_data['24h Prediction Outdoor Relative Humidity [%]']) building.sim_results['diffuse_solar_rad_pred_6h'] = list(weather_data['6h Prediction Diffuse Solar Radiation [W/m2]']) building.sim_results['diffuse_solar_rad_pred_12h'] = list(weather_data['12h Prediction Diffuse Solar Radiation [W/m2]']) building.sim_results['diffuse_solar_rad_pred_24h'] = list(weather_data['24h Prediction Diffuse Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad_pred_6h'] = list(weather_data['6h Prediction Direct Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad_pred_12h'] = list(weather_data['12h Prediction Direct Solar Radiation [W/m2]']) building.sim_results['direct_solar_rad_pred_24h'] = list(weather_data['24h Prediction Direct Solar Radiation [W/m2]']) # Reading the building attributes building.building_type = attributes['Building_Type'] building.climate_zone = attributes['Climate_Zone'] building.solar_power_capacity = attributes['Solar_Power_Installed(kW)'] with open(solar_profile) as csv_file: data = pd.read_csv(csv_file) building.sim_results['solar_gen'] = list(attributes['Solar_Power_Installed(kW)']*data['Hourly Data: AC inverter power (W)']/1000) # Finding the max and min possible values of all the states, which can then be used by the RL agent to scale the states and train any function approximators more effectively s_low, s_high = [], [] for state_name, value in zip(buildings_states_actions[uid]['states'], buildings_states_actions[uid]['states'].values()): if value == True: if state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc': s_low.append(min(building.sim_results[state_name])) s_high.append(max(building.sim_results[state_name])) else: s_low.append(0.0) s_high.append(1.0) a_low, a_high = [], [] for state_name, value in zip(buildings_states_actions[uid]['actions'], buildings_states_actions[uid]['actions'].values()): if value == True: a_low.append(0.0) a_high.append(1.0) building.set_state_space(np.array(s_high), np.array(s_low)) building.set_action_space(np.array(a_high), np.array(a_low)) observation_spaces.append(building.observation_space) action_spaces.append(building.action_space) buildings[uid] = building for building in buildings.values(): # If the DHW device is a HeatPump if isinstance(building.dhw_heating_device, HeatPump): # Calculating COPs of the heat pumps for every hour building.dhw_heating_device.cop_heating = building.dhw_heating_device.eta_tech*(building.dhw_heating_device.t_target_heating + 273.15)/(building.dhw_heating_device.t_target_heating - weather_data['Outdoor Drybulb Temperature [C]']) building.dhw_heating_device.cop_heating[building.dhw_heating_device.cop_heating < 0] = 20.0 building.dhw_heating_device.cop_heating[building.dhw_heating_device.cop_heating > 20] = 20.0 building.dhw_heating_device.cop_heating = building.dhw_heating_device.cop_heating.to_numpy() building.cooling_device.cop_cooling = building.cooling_device.eta_tech*(building.cooling_device.t_target_cooling + 273.15)/(weather_data['Outdoor Drybulb Temperature [C]'] - building.cooling_device.t_target_cooling) building.cooling_device.cop_cooling[building.cooling_device.cop_cooling < 0] = 20.0 building.cooling_device.cop_cooling[building.cooling_device.cop_cooling > 20] = 20.0 building.cooling_device.cop_cooling = building.cooling_device.cop_cooling.to_numpy() auto_size(buildings) return buildings, observation_spaces, action_spaces
data_folder = Path("data/") demand_file = data_folder / "AustinResidential_TH.csv" weather_file = data_folder / 'Austin_Airp_TX-hour.csv' #building_ids = [4, 5, 9, 16, 21, 26, 33, 36, 49, 59] building_ids = [4] heat_pump, heat_tank, cooling_tank = {}, {}, {} #Ref: Assessment of energy efficiency in electric storage water heaters (2008 Energy and Buildings) loss_factor = 0.19 / 24 buildings = {} for uid in building_ids: heat_pump[uid] = HeatPump(nominal_power=9e12, eta_tech=0.22, t_target_heating=45, t_target_cooling=10) heat_tank[uid] = EnergyStorage(capacity=9e12, loss_coeff=loss_factor) cooling_tank[uid] = EnergyStorage(capacity=9e12, loss_coeff=loss_factor) buildings[uid] = Building(uid, heating_storage=heat_tank[uid], cooling_storage=cooling_tank[uid], heating_device=heat_pump[uid], cooling_device=heat_pump[uid]) buildings[uid].state_action_space(np.array([24.0, 40.0, 1.001]), np.array([1.0, 17.0, -0.001]), np.array([0.5]), np.array([-0.5])) building_loader(demand_file, weather_file, buildings) auto_size(buildings, t_target_heating=45, t_target_cooling=10)
def plan_on_replay_buffer(self, num_iterations=1, without_updates=False, rel_delta=True): delta = float('inf') idx = 0 if without_updates: num_iterations = 1 # if not without_updates: # logger.debug("Replay buffer = {0}".format(self.replay_buffer)) alpha = self.alpha # if not without_updates: # alpha = float(input("What alpha value?")) self.max_action_val_seen_till_now = 0.0 self.max_action_val_seen_pair = None self.min_action_val_seen_till_now = float('inf') self.min_action_val_seen_pair = None # prev_delta = float('inf') prev_delta_ratio = float('inf') alpha_ceil = 1.0 if not without_updates: self.got_stop_signal = False self.num_times_delta_inc = 0 max_delta_ratio = 0.0 while True: #* self.min_action_val_seen_till_now: #idx < num_iterations or ( if self.got_stop_signal and not without_updates: break self.max_action_val_seen_till_now = 0.0 idx += 1 prev_state = {} delta = 0.0 Q_sa_copy = copy.deepcopy(self.Q_sa) for state in self.replay_buffer: import sys sys.stdout.flush() if self.got_stop_signal and not without_updates: break if not prev_state: prev_state = state continue # Our Tweak -> We can make updates for different states which we haven't even seen! for charge_level in range( self.level_cnt): # int(self.level_cnt/2) if self.got_stop_signal and not without_updates: break charge_val = self.charge_disc.get_val(charge_level) # To account for losses when storing energy. charge_val = charge_val * (1 - self.loss_coefficient) for action in range(self.level_cnt): if self.got_stop_signal and not without_updates: break action_val = self.action_disc.get_val(action) # Testing hack # if charge_level != 0 or action_val != 0: # continue # print("Checking state {0} charge {1} action_val {2}".format(state, charge_val, action_val)) if action_val < 0 and -1 * action_val > charge_val: continue if action_val > 0 and action_val > 1 - charge_val: continue cooling_pump = HeatPump(nominal_power=9e12, eta_tech=0.22, t_target_heating=45, t_target_cooling=1) cooling_pump.set_cop( t_source_cooling=prev_state["t_out"]) # TODO: Not handling cases where without charge in ES, we can't satisfy our cooling demand. assert (cooling_pump.get_max_cooling_power( t_source_cooling=prev_state["t_out"]) > prev_state["cooling_demand"]) cooling_power_avail = cooling_pump.get_max_cooling_power( t_source_cooling=prev_state["t_out"] ) - prev_state["cooling_demand"] # Don't accept charge values which require charging more than possible, and which discharge more than required. # Not updating weights for these values will help in not using this for updates and focussing representational power # in valid actions only. TODO: Trying removing and confirm the benefits. if action_val >= 0: # Note the different in action_val in this and DDP. In DDP an action means drawing energy from pump to get us that # much of charging and hence we use /efficiency and not * efficiency in that. if action_val * self.storage_capacity * self.efficiency > cooling_power_avail: continue cooling_energy_to_storage = action_val * self.storage_capacity * self.efficiency else: if -1 * action_val * self.storage_capacity * self.efficiency > prev_state[ "cooling_demand"]: continue cooling_energy_to_storage = action_val * self.storage_capacity * self.efficiency next_charge_val = charge_val + cooling_energy_to_storage / self.storage_capacity cooling_energy_drawn_from_heat_pump = cooling_energy_to_storage + prev_state[ "cooling_demand"] elec_demand_cooling = cooling_pump.get_electric_consumption_cooling( cooling_supply=cooling_energy_drawn_from_heat_pump) q_val = None if not self.parameterize_actions: q_val = self.Q_sa[action]([ prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level) ]) else: q_val = self.Q_sa([ prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level), self.action_disc.get_val(action) ]) max_action, max_action_val = self.get_max_action( [ state["hour_of_day"], state["t_out"], next_charge_val ], action_val=None, Q_sa_copy=Q_sa_copy) # The below condition means that the state-action pair has not been visited # if max_action_val == self.initial_weight_value * self.num_tilings: # logger.debug("Tiles not properly generalized, this error should fade in sometime, if it does not - change params!!! State {0} max_action {1} max_action_val {2}, coming from charge_val of prev state {3}".format( # [state["hour_of_day"], state["t_out"], next_charge_val], # self.action_disc.get_val(max_action), # max_action_val, charge_val)) # print("Qs, a for {0} is {1}, Target {2}, Q*s' for {3} is {4} with action {5},{6}".format( # [prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level), action], q_val, # - 1 * (elec_demand_cooling*elec_demand_cooling) + self.gamma * max_action_val, # [state["hour_of_day"], state["t_out"], next_charge_val], max_action_val, max_action,self.action_disc.get_val(max_action))) curr_delta = abs( -1 * (elec_demand_cooling * elec_demand_cooling) + self.gamma * max_action_val - q_val) delta = max(delta, curr_delta) delta_ratio = curr_delta / abs(q_val) max_delta_ratio = max(max_delta_ratio, delta_ratio) prev_state_action = [ prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level), action ] next_state_action = [ state["hour_of_day"], state["t_out"], next_charge_val, max_action ] # Testing hack # tupl = (prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level), action) # if tupl not in self.num_visits: # self.num_visits[tupl] = 0 # self.num_visits[tupl] += 1 if not without_updates: if not self.parameterize_actions: self.Q_sa[action].update( alpha, -1 * (elec_demand_cooling * elec_demand_cooling) + self.gamma * max_action_val, [ prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level) ]) else: self.Q_sa.update( alpha, -1 * (elec_demand_cooling * elec_demand_cooling) + self.gamma * max_action_val, [ prev_state["hour_of_day"], prev_state["t_out"], self.charge_disc.get_val(charge_level), self.action_disc.get_val(action) ]) prev_state = state print( "Done Planning iteration {0}: {1} with buffer size {2}, delta={3}, max_action_value={4}:{5} min_action_val={6}:{7} " "delta_ratio={8}".format( "Without updates" if without_updates else "normal", idx, len(self.replay_buffer), delta, self.max_action_val_seen_till_now, self.max_action_val_seen_pair, self.min_action_val_seen_till_now, self.min_action_val_seen_pair, max_delta_ratio)) # Testing hack if without_updates: return max_delta_ratio max_delta_ratio = self.plan_on_replay_buffer(num_iterations=1, without_updates=True) import sys sys.stdout.flush() if max_delta_ratio < self.delta_term: print("Breaking as max delta ratio < 0.01") break if self.use_adaptive_learning_rate: if max_delta_ratio > prev_delta_ratio: self.num_times_delta_inc += 1 if self.num_times_delta_inc <= 3: prev_delta_ratio = max_delta_ratio continue self.num_times_delta_inc = 0 print( "Max Delta Ratio {0} > prev_delta_ratio {1}. Changing alpha {2} -> {3}" .format(delta_ratio, prev_delta_ratio, alpha, alpha / 2)) alpha /= 2 if alpha < 0.001: break alpha_ceil = alpha else: prev_alpha = alpha alpha = min(2 * alpha, alpha_ceil) print( "Max Delta Ratio {0} <= prev_delta_ratio {1}. Changing alpha {2} -> {3}" .format(delta_ratio, prev_delta_ratio, prev_alpha, alpha)) prev_delta_ratio = max_delta_ratio