"Building_1", "Building_2", "Building_3", "Building_4", "Building_5", "Building_6", "Building_7", "Building_8", "Building_9" ] objective_function = [ 'ramping', '1-load_factor', 'average_daily_peak', 'peak_demand', 'net_electricity_consumption' ] env = CityLearn(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions=building_state_actions, cost_function=objective_function) observations_spaces, actions_spaces = env.get_state_action_spaces() # Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings building_info = env.get_building_information() # RL CONTROLLER #Instantiating the control agent(s) agents = Agent(env, building_info, observations_spaces, actions_spaces) # Select many episodes for training. In the final run we will set this value to 1 (the buildings run for one year) episodes = 10 k, c = 0, 0 cost, cum_reward = {}, {} start = time.time() # The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
def run(config): data_folder = Path(config.data_path) building_attributes = data_folder / 'building_attributes.json' solar_profile = data_folder / 'solar_generation_1kW.csv' building_state_actions = 'buildings_state_action_space.json' # building_ids = ["Building_" + str(i) for i in range(1, config.num_buildings + 1)] config.num_buildings = 6 # customized log directory hidden = config.hidden_dim lr = config.lr tau = config.tau gamma = config.gamma batch_size = config.batch_size buffer_length = config.buffer_length to_print = lambda x: str(x) log_path = "log"+"_hidden"+to_print(hidden)+"_lr"+to_print(lr)+"_tau"+to_print(tau)+"_gamma"+to_print(gamma)+\ "_batch_size"+to_print(batch_size)+"_buffer_length"+to_print(buffer_length)+"_TIME_PERIOD_1008_MAXACTION_25"+"/" logger = SummaryWriter(log_dir=log_path) # TODO fix here building_ids = ["Building_" + str(i) for i in [1, 2, 5, 6, 7, 8]] #[1,2,5,6,7,8] env = CityLearn(building_attributes, solar_profile, building_ids, buildings_states_actions=building_state_actions, cost_function=[ 'ramping', '1-load_factor', 'peak_to_valley_ratio', 'peak_demand', 'net_electricity_consumption' ]) observations_spaces, actions_spaces = env.get_state_action_spaces() # Instantiating the control agent(s) if config.agent_alg == 'MADDPG': agents = MA_DDPG(observations_spaces, actions_spaces, hyper_params=vars(config)) else: raise NotImplementedError k, c = 0, 0 cost, cum_reward = {}, {} buffer = ReplayBuffer(max_steps=config.buffer_length, num_agents=config.num_buildings, obs_dims=[s.shape[0] for s in observations_spaces], ac_dims=[a.shape[0] for a in actions_spaces]) # TODO: store np or tensor in buffer? start = time.time() for e in range(config.n_episodes): cum_reward[e] = 0 rewards = [] state = env.reset() statecast = lambda x: [torch.FloatTensor(s) for s in x] done = False ss = 0 while not done: if k % (40000 * 4) == 0: print('hour: ' + str(k) + ' of ' + str(TIME_PERIOD * config.n_episodes)) action = agents.select_action(statecast(state), explore=False) action = [a.detach().numpy() for a in action] # if batch norm: action = [np.squeeze(a, axis=0) for a in action] ss += 1 #print("action is ", action) #print(action[0].shape) #raise NotImplementedError next_state, reward, done, _ = env.step(action) reward = reward_function( reward) # See comments in reward_function.py #buffer_reward = [-r for r in reward] # agents.add_to_buffer() buffer.push(statecast(state), action, reward, statecast(next_state), done) # if (len(buffer) >= config.batch_size and # (e % config.steps_per_update) < config.n_rollout_threads): if len(buffer) >= config.batch_size: if USE_CUDA: agents.to_train(device='gpu') else: agents.to_train(device='cpu') for a_i in range(agents.n_buildings): sample = buffer.sample(config.batch_size, to_gpu=USE_CUDA) agents.update(sample, a_i, logger=logger, global_step=e * TIME_PERIOD + ss) logger.add_scalar(tag='net electric consumption', scalar_value=env.net_electric_consumption[-1], global_step=e * TIME_PERIOD + ss) logger.add_scalar(tag='env cost total', scalar_value=env.cost()['total'], global_step=e * TIME_PERIOD + ss) logger.add_scalar(tag="1 load factor", scalar_value=env.cost()['1-load_factor'], global_step=e * TIME_PERIOD + ss) logger.add_scalar(tag="peak to valley ratio", scalar_value=env.cost()['peak_to_valley_ratio'], global_step=e * TIME_PERIOD + ss) logger.add_scalar(tag="peak demand", scalar_value=env.cost()['peak_demand'], global_step=e * TIME_PERIOD + ss) logger.add_scalar( tag="net energy consumption", scalar_value=env.cost()['net_electricity_consumption'], global_step=e * TIME_PERIOD + ss) net_energy_consumption_wo_storage = env.net_electric_consumption[ -1] + env.electric_generation[ -1] - env.electric_consumption_cooling_storage[ -1] - env.electric_consumption_dhw_storage[-1] logger.add_scalar(tag="net energy consumption without storage", scalar_value=net_energy_consumption_wo_storage, global_step=e * TIME_PERIOD + ss) for id, r in enumerate(reward): logger.add_scalar(tag="agent {} reward ".format(id), scalar_value=r, global_step=e * TIME_PERIOD + ss) state = next_state cum_reward[e] += reward[0] k += 1 cur_time = time.time() # print("average time : {}s/iteration at iteration {}".format((cur_time - start) / (60.0 * k), k)) cost[e] = env.cost() if c % 1 == 0: print(cost[e]) # add env total cost and reward logger logger.add_scalar(tag='env cost total final', scalar_value=env.cost()['total'], global_step=e) logger.add_scalar(tag="1 load factor final", scalar_value=env.cost()['1-load_factor'], global_step=e) logger.add_scalar(tag="peak to valley ratio final", scalar_value=env.cost()['peak_to_valley_ratio'], global_step=e) logger.add_scalar(tag="peak demand final", scalar_value=env.cost()['peak_demand'], global_step=e) logger.add_scalar( tag="net energy consumption final", scalar_value=env.cost()['net_electricity_consumption'], global_step=e) net_energy_consumption_wo_storage = env.net_electric_consumption[ -1] + env.electric_generation[ -1] - env.electric_consumption_cooling_storage[ -1] - env.electric_consumption_dhw_storage[-1] logger.add_scalar(tag="net energy consumption without storage", scalar_value=net_energy_consumption_wo_storage, global_step=e) c += 1 rewards.append(reward) end = time.time() print((end - start) / 60.0)
time.strftime("%Y%m%d-%H%M%S")) # apprends the timedate os.makedirs(parent_dir, exist_ok=True) # Create log dir log_dir = parent_dir + "monitor" os.makedirs(log_dir, exist_ok=True) # Set the interval and their count interval = 8760 icount = int(sys.argv[1]) if sys.argv is not None else 10 log_interval = 1 check_interval = 1 save_interval = 1 # the noise objects for DDPG _, actions_spaces = env.get_state_action_spaces() n_actions = 0 for action in actions_spaces: n_actions += action.shape[-1] # Make VecEnv + Wrap in Monitor env = Monitor(env, filename=log_dir) callbackBest = SaveOnBestTrainingRewardCallback2_10( check_freq=check_interval * interval, log_dir=log_dir, save_freq=interval * save_interval) # Add callbacks to the callback list callbackList = [] useBestCallback = True
verbose=0) RBC_env = CityLearn(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions=building_state_actions, cost_function=objective_function, simulation_period=(3624, 5832), central_agent=False, normalise=True, verbose=0) # Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1. # Can be obtained using observations_spaces[i].low or .high observations_spaces, actions_spaces = env.get_state_action_spaces() observations_spacesRBC, actions_spacesRBC = RBC_env.get_state_action_spaces() # Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings building_info = env.get_building_information() #%% """ ############################################# STEP 2: Determine the size of the Action and State Spaces and the Number of Agents The observation space consists of various variables corresponding to the building_state_action_json file. See https://github.com/intelligent-environments-lab/CityLearn for more information about the states. Each agent receives all observations of all buildings (communication between buildings). Up to two continuous actions are available, corresponding to whether to charge or discharge