Python CityLearn.get_state_action_spaces 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: citylearn

클래스/타입: CityLearn

메소드/함수: get_state_action_spaces

hotexamples.com에서의 예제들: 4

Python CityLearn.get_state_action_spaces - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 citylearn.CityLearn.get_state_action_spaces에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

cost(13)

CityLearn(9)

step(6)

reset(5)

get_state_action_spaces(4)

close(1)

get_baseline_cost(1)

get_building_information(1)

seed(1)

예제 #1

파일 보기

    "Building_1", "Building_2", "Building_3", "Building_4", "Building_5",
    "Building_6", "Building_7", "Building_8", "Building_9"
]
objective_function = [
    'ramping', '1-load_factor', 'average_daily_peak', 'peak_demand',
    'net_electricity_consumption'
]

env = CityLearn(data_path,
                building_attributes,
                weather_file,
                solar_profile,
                building_ids,
                buildings_states_actions=building_state_actions,
                cost_function=objective_function)
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

# RL CONTROLLER
#Instantiating the control agent(s)
agents = Agent(env, building_info, observations_spaces, actions_spaces)

# Select many episodes for training. In the final run we will set this value to 1 (the buildings run for one year)
episodes = 10

k, c = 0, 0
cost, cum_reward = {}, {}
start = time.time()
# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)

예제 #2

파일 보기

def run(config):
    data_folder = Path(config.data_path)
    building_attributes = data_folder / 'building_attributes.json'
    solar_profile = data_folder / 'solar_generation_1kW.csv'
    building_state_actions = 'buildings_state_action_space.json'
    # building_ids = ["Building_" + str(i) for i in range(1, config.num_buildings + 1)]
    config.num_buildings = 6

    # customized log directory
    hidden = config.hidden_dim
    lr = config.lr
    tau = config.tau
    gamma = config.gamma
    batch_size = config.batch_size
    buffer_length = config.buffer_length
    to_print = lambda x: str(x)
    log_path = "log"+"_hidden"+to_print(hidden)+"_lr"+to_print(lr)+"_tau"+to_print(tau)+"_gamma"+to_print(gamma)+\
                "_batch_size"+to_print(batch_size)+"_buffer_length"+to_print(buffer_length)+"_TIME_PERIOD_1008_MAXACTION_25"+"/"

    logger = SummaryWriter(log_dir=log_path)
    # TODO fix here
    building_ids = ["Building_" + str(i)
                    for i in [1, 2, 5, 6, 7, 8]]  #[1,2,5,6,7,8]
    env = CityLearn(building_attributes,
                    solar_profile,
                    building_ids,
                    buildings_states_actions=building_state_actions,
                    cost_function=[
                        'ramping', '1-load_factor', 'peak_to_valley_ratio',
                        'peak_demand', 'net_electricity_consumption'
                    ])
    observations_spaces, actions_spaces = env.get_state_action_spaces()

    # Instantiating the control agent(s)
    if config.agent_alg == 'MADDPG':
        agents = MA_DDPG(observations_spaces,
                         actions_spaces,
                         hyper_params=vars(config))
    else:
        raise NotImplementedError

    k, c = 0, 0
    cost, cum_reward = {}, {}
    buffer = ReplayBuffer(max_steps=config.buffer_length,
                          num_agents=config.num_buildings,
                          obs_dims=[s.shape[0] for s in observations_spaces],
                          ac_dims=[a.shape[0] for a in actions_spaces])
    # TODO: store np or tensor in buffer?
    start = time.time()
    for e in range(config.n_episodes):
        cum_reward[e] = 0
        rewards = []
        state = env.reset()
        statecast = lambda x: [torch.FloatTensor(s) for s in x]
        done = False
        ss = 0
        while not done:
            if k % (40000 * 4) == 0:
                print('hour: ' + str(k) + ' of ' +
                      str(TIME_PERIOD * config.n_episodes))
            action = agents.select_action(statecast(state), explore=False)
            action = [a.detach().numpy() for a in action]
            # if batch norm:
            action = [np.squeeze(a, axis=0) for a in action]
            ss += 1
            #print("action is ", action)
            #print(action[0].shape)
            #raise NotImplementedError
            next_state, reward, done, _ = env.step(action)
            reward = reward_function(
                reward)  # See comments in reward_function.py
            #buffer_reward = [-r for r in reward]
            # agents.add_to_buffer()
            buffer.push(statecast(state), action, reward,
                        statecast(next_state), done)
            # if (len(buffer) >= config.batch_size and
            #         (e % config.steps_per_update) < config.n_rollout_threads):
            if len(buffer) >= config.batch_size:
                if USE_CUDA:
                    agents.to_train(device='gpu')
                else:
                    agents.to_train(device='cpu')
                for a_i in range(agents.n_buildings):
                    sample = buffer.sample(config.batch_size, to_gpu=USE_CUDA)
                    agents.update(sample,
                                  a_i,
                                  logger=logger,
                                  global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag='net electric consumption',
                              scalar_value=env.net_electric_consumption[-1],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag='env cost total',
                              scalar_value=env.cost()['total'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="1 load factor",
                              scalar_value=env.cost()['1-load_factor'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="peak to valley ratio",
                              scalar_value=env.cost()['peak_to_valley_ratio'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="peak demand",
                              scalar_value=env.cost()['peak_demand'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(
                tag="net energy consumption",
                scalar_value=env.cost()['net_electricity_consumption'],
                global_step=e * TIME_PERIOD + ss)
            net_energy_consumption_wo_storage = env.net_electric_consumption[
                -1] + env.electric_generation[
                    -1] - env.electric_consumption_cooling_storage[
                        -1] - env.electric_consumption_dhw_storage[-1]
            logger.add_scalar(tag="net energy consumption without storage",
                              scalar_value=net_energy_consumption_wo_storage,
                              global_step=e * TIME_PERIOD + ss)

            for id, r in enumerate(reward):
                logger.add_scalar(tag="agent {} reward ".format(id),
                                  scalar_value=r,
                                  global_step=e * TIME_PERIOD + ss)

            state = next_state
            cum_reward[e] += reward[0]
            k += 1
            cur_time = time.time()
            # print("average time : {}s/iteration at iteration {}".format((cur_time - start) / (60.0 * k), k))
        cost[e] = env.cost()
        if c % 1 == 0:
            print(cost[e])
        # add env total cost and reward logger
        logger.add_scalar(tag='env cost total final',
                          scalar_value=env.cost()['total'],
                          global_step=e)
        logger.add_scalar(tag="1 load factor final",
                          scalar_value=env.cost()['1-load_factor'],
                          global_step=e)
        logger.add_scalar(tag="peak to valley ratio final",
                          scalar_value=env.cost()['peak_to_valley_ratio'],
                          global_step=e)
        logger.add_scalar(tag="peak demand final",
                          scalar_value=env.cost()['peak_demand'],
                          global_step=e)
        logger.add_scalar(
            tag="net energy consumption final",
            scalar_value=env.cost()['net_electricity_consumption'],
            global_step=e)
        net_energy_consumption_wo_storage = env.net_electric_consumption[
            -1] + env.electric_generation[
                -1] - env.electric_consumption_cooling_storage[
                    -1] - env.electric_consumption_dhw_storage[-1]
        logger.add_scalar(tag="net energy consumption without storage",
                          scalar_value=net_energy_consumption_wo_storage,
                          global_step=e)
        c += 1
        rewards.append(reward)

    end = time.time()
    print((end - start) / 60.0)

예제 #3

파일 보기

파일: pkg_DDPG.py 프로젝트: anjukan/CityLearn

    time.strftime("%Y%m%d-%H%M%S"))  # apprends the timedate
os.makedirs(parent_dir, exist_ok=True)

# Create log dir
log_dir = parent_dir + "monitor"
os.makedirs(log_dir, exist_ok=True)

# Set the interval and their count
interval = 8760
icount = int(sys.argv[1]) if sys.argv is not None else 10
log_interval = 1
check_interval = 1
save_interval = 1

# the noise objects for DDPG
_, actions_spaces = env.get_state_action_spaces()

n_actions = 0
for action in actions_spaces:
    n_actions += action.shape[-1]

# Make VecEnv + Wrap in Monitor
env = Monitor(env, filename=log_dir)
callbackBest = SaveOnBestTrainingRewardCallback2_10(
    check_freq=check_interval * interval,
    log_dir=log_dir,
    save_freq=interval * save_interval)

# Add callbacks to the callback list
callbackList = []
useBestCallback = True

예제 #4

파일 보기

파일: train_MARL.py 프로젝트: anjukan/CityLearn

                verbose=0)
RBC_env = CityLearn(data_path,
                    building_attributes,
                    weather_file,
                    solar_profile,
                    building_ids,
                    buildings_states_actions=building_state_actions,
                    cost_function=objective_function,
                    simulation_period=(3624, 5832),
                    central_agent=False,
                    normalise=True,
                    verbose=0)

# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
# Can be obtained using observations_spaces[i].low or .high
observations_spaces, actions_spaces = env.get_state_action_spaces()
observations_spacesRBC, actions_spacesRBC = RBC_env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

#%%
"""
#############################################
STEP 2: Determine the size of the Action and State Spaces and the Number of Agents

The observation space consists of various variables corresponding to the 
building_state_action_json file. See https://github.com/intelligent-environments-lab/CityLearn
for more information about the states. Each agent receives all observations of all buildings
(communication between buildings). 
Up to two continuous actions are available, corresponding to whether to charge or discharge