예제 #1
0
             batch_size=1024,
             buffer_size=int(5e5),
             verbose=0,
             param_noise=param_noise,
             action_noise=action_noise,
             tensorboard_log=parent_dir + "tensorboard/",
             n_cpu_tf_sess=multiprocessing.cpu_count())

model.learn(total_timesteps=interval * icount,
            log_interval=interval,
            tb_log_name="DDPG_{}".format(time.strftime("%Y%m%d")),
            callback=callbackList)

obs = env.reset()
dones = False
counter = []
while dones == False:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    counter.append(rewards)

env.close()

print("\nFinal costs:")
pp.pprint(env.cost())

# Plot the reward graph
if useBestCallback:
    plot_results([log_dir], interval * icount, results_plotter.X_TIMESTEPS,
                 "DDPG CityLearn")
    plt.savefig(log_dir + "/rewards.pdf")
예제 #2
0
start = time.time()
# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
for e in range(episodes):
    cum_reward[e] = 0
    rewards = []
    state = env.reset()
    done = False
    while not done:
        if k % (1000) == 0:
            print('hour: ' + str(k) + ' of ' + str(8760 * episodes))

        action = agents.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agents.add_to_buffer(state, action, reward, next_state, done)
        state = next_state

        cum_reward[e] += reward[0]
        rewards.append(reward)
        k += 1

    #ddpg
    #if  agents.buffer.size() >= 64 and agents.buffer.size() >= 2000:
    #agents.replay()
    agents.replay()

    cost[e] = env.cost()
    if c % 1 == 0:
        print(cost[e])
    c += 1

print(time.time() - start)
예제 #3
0
파일: main.py 프로젝트: jie-jay/CityLearn
# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

params_agent = {
    'building_ids':
    ["Building_" + str(i) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]],
    'buildings_states_actions': 'buildings_state_action_space.json',
    'building_info': building_info,
    'observation_spaces': observations_spaces,
    'action_spaces': actions_spaces
}

# Instantiating the control agent(s)
agents = Agent(**params_agent)

state = env.reset()
done = False

action, coordination_vars = agents.select_action(state)
while not done:
    next_state, reward, done, _ = env.step(action)
    action_next, coordination_vars_next = agents.select_action(next_state)
    agents.add_to_buffer(state, action, reward, next_state, done,
                         coordination_vars, coordination_vars_next)
    coordination_vars = coordination_vars_next
    state = next_state
    action = action_next

env.cost()
예제 #4
0
            # writer.add_scalar('loss/entropy_loss', alpha_loss, total_ep)
            # writer.add_scalar('entropy_temprature/alpha', alpha, total_ep)
        #if agents.time_step % 8759 == 0 :
            #print(env.net_electric_consumption[0])
            #sys.exit()

        state = next_state
        # cum_reward[e] += reward[0]
        # rewards.append(reward)
        k += 1
        episode_reward += reward

        total_ep += 1
    if args.log:
        # Tensorboard log citylearn cost function
        writer.add_scalar("Scores/ramping", env.cost()['ramping'], e)
        writer.add_scalar("Scores/1-load_factor", env.cost()['1-load_factor'], e)
        writer.add_scalar("Scores/average_daily_peak", env.cost()['average_daily_peak'], e)
        writer.add_scalar("Scores/peak_demand", env.cost()['peak_demand'], e)
        writer.add_scalar("Scores/net_electricity_consumption", env.cost()['net_electricity_consumption'], e)
        writer.add_scalar("Scores/total", env.cost()['total'], e)
        writer.add_scalar("Scores/episode_reward", episode_reward, e)

        # Append the total score/reward to the list
    score_list.append(env.cost()['total'])
    reward_list.append(episode_reward)

    # Save trained Actor and Critic network periodically as a checkpoint if it's the best model achieved
    # if e % args.checkpoint_interval == 0:
    if env.cost()['total'] < best_reward:
        best_reward = env.cost()['total']
예제 #5
0
def run(config):
    data_folder = Path(config.data_path)
    building_attributes = data_folder / 'building_attributes.json'
    solar_profile = data_folder / 'solar_generation_1kW.csv'
    building_state_actions = 'buildings_state_action_space.json'
    # building_ids = ["Building_" + str(i) for i in range(1, config.num_buildings + 1)]
    config.num_buildings = 6

    # customized log directory
    hidden = config.hidden_dim
    lr = config.lr
    tau = config.tau
    gamma = config.gamma
    batch_size = config.batch_size
    buffer_length = config.buffer_length
    to_print = lambda x: str(x)
    log_path = "log"+"_hidden"+to_print(hidden)+"_lr"+to_print(lr)+"_tau"+to_print(tau)+"_gamma"+to_print(gamma)+\
                "_batch_size"+to_print(batch_size)+"_buffer_length"+to_print(buffer_length)+"_TIME_PERIOD_1008_MAXACTION_25"+"/"

    logger = SummaryWriter(log_dir=log_path)
    # TODO fix here
    building_ids = ["Building_" + str(i)
                    for i in [1, 2, 5, 6, 7, 8]]  #[1,2,5,6,7,8]
    env = CityLearn(building_attributes,
                    solar_profile,
                    building_ids,
                    buildings_states_actions=building_state_actions,
                    cost_function=[
                        'ramping', '1-load_factor', 'peak_to_valley_ratio',
                        'peak_demand', 'net_electricity_consumption'
                    ])
    observations_spaces, actions_spaces = env.get_state_action_spaces()

    # Instantiating the control agent(s)
    if config.agent_alg == 'MADDPG':
        agents = MA_DDPG(observations_spaces,
                         actions_spaces,
                         hyper_params=vars(config))
    else:
        raise NotImplementedError

    k, c = 0, 0
    cost, cum_reward = {}, {}
    buffer = ReplayBuffer(max_steps=config.buffer_length,
                          num_agents=config.num_buildings,
                          obs_dims=[s.shape[0] for s in observations_spaces],
                          ac_dims=[a.shape[0] for a in actions_spaces])
    # TODO: store np or tensor in buffer?
    start = time.time()
    for e in range(config.n_episodes):
        cum_reward[e] = 0
        rewards = []
        state = env.reset()
        statecast = lambda x: [torch.FloatTensor(s) for s in x]
        done = False
        ss = 0
        while not done:
            if k % (40000 * 4) == 0:
                print('hour: ' + str(k) + ' of ' +
                      str(TIME_PERIOD * config.n_episodes))
            action = agents.select_action(statecast(state), explore=False)
            action = [a.detach().numpy() for a in action]
            # if batch norm:
            action = [np.squeeze(a, axis=0) for a in action]
            ss += 1
            #print("action is ", action)
            #print(action[0].shape)
            #raise NotImplementedError
            next_state, reward, done, _ = env.step(action)
            reward = reward_function(
                reward)  # See comments in reward_function.py
            #buffer_reward = [-r for r in reward]
            # agents.add_to_buffer()
            buffer.push(statecast(state), action, reward,
                        statecast(next_state), done)
            # if (len(buffer) >= config.batch_size and
            #         (e % config.steps_per_update) < config.n_rollout_threads):
            if len(buffer) >= config.batch_size:
                if USE_CUDA:
                    agents.to_train(device='gpu')
                else:
                    agents.to_train(device='cpu')
                for a_i in range(agents.n_buildings):
                    sample = buffer.sample(config.batch_size, to_gpu=USE_CUDA)
                    agents.update(sample,
                                  a_i,
                                  logger=logger,
                                  global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag='net electric consumption',
                              scalar_value=env.net_electric_consumption[-1],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag='env cost total',
                              scalar_value=env.cost()['total'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="1 load factor",
                              scalar_value=env.cost()['1-load_factor'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="peak to valley ratio",
                              scalar_value=env.cost()['peak_to_valley_ratio'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="peak demand",
                              scalar_value=env.cost()['peak_demand'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(
                tag="net energy consumption",
                scalar_value=env.cost()['net_electricity_consumption'],
                global_step=e * TIME_PERIOD + ss)
            net_energy_consumption_wo_storage = env.net_electric_consumption[
                -1] + env.electric_generation[
                    -1] - env.electric_consumption_cooling_storage[
                        -1] - env.electric_consumption_dhw_storage[-1]
            logger.add_scalar(tag="net energy consumption without storage",
                              scalar_value=net_energy_consumption_wo_storage,
                              global_step=e * TIME_PERIOD + ss)

            for id, r in enumerate(reward):
                logger.add_scalar(tag="agent {} reward ".format(id),
                                  scalar_value=r,
                                  global_step=e * TIME_PERIOD + ss)

            state = next_state
            cum_reward[e] += reward[0]
            k += 1
            cur_time = time.time()
            # print("average time : {}s/iteration at iteration {}".format((cur_time - start) / (60.0 * k), k))
        cost[e] = env.cost()
        if c % 1 == 0:
            print(cost[e])
        # add env total cost and reward logger
        logger.add_scalar(tag='env cost total final',
                          scalar_value=env.cost()['total'],
                          global_step=e)
        logger.add_scalar(tag="1 load factor final",
                          scalar_value=env.cost()['1-load_factor'],
                          global_step=e)
        logger.add_scalar(tag="peak to valley ratio final",
                          scalar_value=env.cost()['peak_to_valley_ratio'],
                          global_step=e)
        logger.add_scalar(tag="peak demand final",
                          scalar_value=env.cost()['peak_demand'],
                          global_step=e)
        logger.add_scalar(
            tag="net energy consumption final",
            scalar_value=env.cost()['net_electricity_consumption'],
            global_step=e)
        net_energy_consumption_wo_storage = env.net_electric_consumption[
            -1] + env.electric_generation[
                -1] - env.electric_consumption_cooling_storage[
                    -1] - env.electric_consumption_dhw_storage[-1]
        logger.add_scalar(tag="net energy consumption without storage",
                          scalar_value=net_energy_consumption_wo_storage,
                          global_step=e)
        c += 1
        rewards.append(reward)

    end = time.time()
    print((end - start) / 60.0)
예제 #6
0
                torch.save(
                    agent.critic_local[building].state_dict(), parent_dir +
                    "chk/step_{}".format(iteration_step) + cn_filename)
            print("Saving a checkpoint to {}".format(
                parent_dir + "chk/step_{}".format(iteration_step)))

        # If any agent indicates that the episode is done,
        # then exit episode loop, to begin new episode
        if np.any(done):
            break

        iteration_step += 1

    timer = time.time() - start_timer

    print(env.cost())
    writer.add_scalar("Scores/ramping", env.cost()['ramping'], iteration_step)
    writer.add_scalar("Scores/1-load_factor",
                      env.cost()['1-load_factor'], iteration_step)
    writer.add_scalar("Scores/average_daily_peak",
                      env.cost()['average_daily_peak'], iteration_step)
    writer.add_scalar("Scores/peak_demand",
                      env.cost()['peak_demand'], iteration_step)
    writer.add_scalar("Scores/net_electricity_consumption",
                      env.cost()['net_electricity_consumption'],
                      iteration_step)
    writer.add_scalar("Scores/total", env.cost()['total'], iteration_step)

    writer.flush()

    # Add episode score to Scores and
예제 #7
0
# Select many episodes for training. In the final run we will set this value to 1 (the buildings run for one year)
n_episodes = 30

k, c = 0, 0
cost, cum_reward = {}, {}

# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()
for e in range(n_episodes):
    is_evaluating = (e > 7)  # Evaluate deterministic policy after 7 epochs
    rewards = []
    state = env.reset()
    done = False

    j = 0

    each_episode_time = time.time()
    while not done:
        action = agents.select_action(state, deterministic=is_evaluating)
        next_state, reward, done, _ = env.step(action)
        agents.add_to_buffer(state, action, reward, next_state, done)
        agents.update()

        state = next_state

    print('Loss -', env.cost(), 'Simulation time (min) -',
          (time.time() - start) / 60.0, 'episode time(min) -',
          (time.time() - each_episode_time) / 60.0)
    with open("./all_agent.csv", "a") as log:
        log.write("Time:{0} ,Loss - {1},episode_time(min)-{2},Simulation_time (min)-{3} \n".format(time.strftime("%Y-%m-%d %H:%M:%S"),\
                                                                                                   env.cost(),(time.time()-each_episode_time)/60.0,(time.time()-start)/60.0 ))
            # critic_1_loss,critic_2_loss,policy_loss,alpha_loss,alpha= agents.update()
            critic_1_loss, critic_2_loss, policy_loss = agents.update()

        state = next_state
        # cum_reward[e] += reward[0]
        # rewards.append(reward)
        k += 1
        episode_reward = [x + y for x, y in zip(reward, episode_reward)]
        #episode_reward = map(sum,zip(reward,episode_reward))

        total_ep += 1
    #agents.dist_cons_buffer.reset()
    if args.log:

        # Tensorboard log citylearn cost function
        writer.add_scalar("Scores/ramping", env.cost()['ramping'], e)
        writer.add_scalar("Scores/1-load_factor",
                          env.cost()['1-load_factor'], e)
        writer.add_scalar("Scores/average_daily_peak",
                          env.cost()['average_daily_peak'], e)
        writer.add_scalar("Scores/peak_demand", env.cost()['peak_demand'], e)
        writer.add_scalar("Scores/net_electricity_consumption",
                          env.cost()['net_electricity_consumption'], e)
        writer.add_scalar("Scores/total", env.cost()['total'], e)
        writer.add_scalar("Scores/episode_reward", sum(episode_reward), e)

        # Append the total score/reward to the list
    score_list.append(env.cost()['total'])
    reward_list.append(sum(episode_reward))

    # Save trained Actor and Critic network periodically as a checkpoint if it's the best model achieved
예제 #9
0
        #    sys.exit()

    # Tensorboard log reward values
    writer.add_scalar("Reward/Total", sum(episode_reward), total_numsteps)
    writer.add_scalar("Reward/Building_1", episode_reward[0], total_numsteps)
    writer.add_scalar("Reward/Building_2", episode_reward[1], total_numsteps)
    writer.add_scalar("Reward/Building_3", episode_reward[2], total_numsteps)
    writer.add_scalar("Reward/Building_4", episode_reward[3], total_numsteps)
    writer.add_scalar("Reward/Building_5", episode_reward[4], total_numsteps)
    writer.add_scalar("Reward/Building_6", episode_reward[5], total_numsteps)
    writer.add_scalar("Reward/Building_7", episode_reward[6], total_numsteps)
    writer.add_scalar("Reward/Building_8", episode_reward[7], total_numsteps)
    writer.add_scalar("Reward/Building_9", episode_reward[8], total_numsteps)

    # Tensorboard log citylearn cost function
    writer.add_scalar("Scores/ramping", env.cost()['ramping'], total_numsteps)
    writer.add_scalar("Scores/1-load_factor",
                      env.cost()['1-load_factor'], total_numsteps)
    writer.add_scalar("Scores/average_daily_peak",
                      env.cost()['average_daily_peak'], total_numsteps)
    writer.add_scalar("Scores/peak_demand",
                      env.cost()['peak_demand'], total_numsteps)
    writer.add_scalar("Scores/net_electricity_consumption",
                      env.cost()['net_electricity_consumption'],
                      total_numsteps)
    writer.add_scalar("Scores/total", env.cost()['total'], total_numsteps)

    print("Episode: {}, total numsteps: {}, total cost: {}, reward: {}".format(
        i_episode, total_numsteps, round(env.cost()['total'], 5),
        round(sum(episode_reward), 2)))
예제 #10
0
tau = 0.005
gamma = 0.99
lr = 0.0003
hid = [256,256]

n_episodes = 12

# Instantiating the control agent(s)
agents = RL_Agents_Coord(building_id, building_state_actions, building_info, observations_spaces, actions_spaces, discount = gamma, batch_size = bs, replay_buffer_capacity = 1e5, regression_buffer_capacity = 12*8760, tau=tau, lr=lr, hidden_dim=hid, start_training=8760*3, exploration_period = 8760*3+1,  start_regression=8760, information_sharing = True, pca_compression = .95, action_scaling_coef=0.5, reward_scaling = 5., update_per_step = 1, iterations_as = 2)

# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()
for e in range(n_episodes): 
    is_evaluating = (e > 7) # Evaluate deterministic policy after 7 epochs
    rewards = []
    state = env.reset()
    done = False

    j = 0
    action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)    
    while not done:
        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
        agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next

    print('Loss -',env.cost(), 'Simulation time (min) -',(time.time()-start)/60.0)
예제 #11
0
# One episode
while dones==False:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    counter.append(rewards)

    # Logging
    if iteration_step % interval:

		# Building reward
        writer.add_scalar("Reward/Buildings", rewards, iteration_step)

    iteration_step += 1

# Costs
writer.add_scalars("Scores", env.cost(), iteration_step)
# writer.add_scalar("Scores/ramping", env.cost()['ramping'], iteration_step)
# writer.add_scalar("Scores/1-load_factor", env.cost()['1-load_factor'], iteration_step)
# writer.add_scalar("Scores/average_daily_peak", env.cost()['average_daily_peak'], iteration_step)
# writer.add_scalar("Scores/peak_demand", env.cost()['peak_demand'], iteration_step)
# writer.add_scalar("Scores/net_electricity_consumption", env.cost()['net_electricity_consumption'], iteration_step)
# writer.add_scalar("Scores/total", env.cost()['total'], iteration_step)

env.close()

print("\nFinal rewards:")
pp.pprint(env.cost())

# Plot the reward graph
# plot_results([log_dir], interval*icount, results_plotter.X_TIMESTEPS, "SAC CityLearn")
# plt.savefig(log_dir+"/rewards.pdf")
예제 #12
0
                         information_sharing=True,
                         pca_compression=.95,
                         action_scaling_coef=0.5,
                         reward_scaling=5.,
                         update_per_step=1,
                         iterations_as=2)

# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()
for e in range(n_episodes):
    is_evaluating = (e > 7)  # Evaluate deterministic policy after 7 epochs
    rewards = []
    state = env.reset()
    done = False

    j = 0
    action, coordination_vars = agents.select_action(
        state, deterministic=is_evaluating)
    while not done:
        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agents.select_action(
            next_state, deterministic=is_evaluating)
        agents.add_to_buffer(state, action, reward, next_state, done,
                             coordination_vars, coordination_vars_next)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next

    print('Loss -', env.cost(), 'Simulation time (min) -',
          (time.time() - start) / 60.0)
예제 #13
0
        episode_peak_reward += r_peak
        episode_day_reward += r_day
        episode_night_reward += r_night
        episode_smooth_reward += r_smooth

        state = next_state

    # Tensorboard log reward values
    writer.add_scalar('Reward/Total', episode_reward, total_numsteps)
    writer.add_scalar('Reward/Peak', episode_peak_reward, total_numsteps)
    writer.add_scalar('Reward/Day_Charging', episode_day_reward, total_numsteps)
    writer.add_scalar('Reward/Night_Charging', episode_night_reward, total_numsteps)
    writer.add_scalar('Reward/Smooth_Actions', episode_smooth_reward, total_numsteps)
	
    # Tensorboard log citylearn cost function
    writer.add_scalar("Scores/ramping", env.cost()['ramping'], total_numsteps)
    writer.add_scalar("Scores/1-load_factor", env.cost()['1-load_factor'], total_numsteps)
    writer.add_scalar("Scores/average_daily_peak", env.cost()['average_daily_peak'], total_numsteps)
    writer.add_scalar("Scores/peak_demand", env.cost()['peak_demand'], total_numsteps)
    writer.add_scalar("Scores/net_electricity_consumption", env.cost()['net_electricity_consumption'], total_numsteps)
    writer.add_scalar("Scores/total", env.cost()['total'], total_numsteps)

    # Log how much storage is utilised by calculating abs sum of actions (CHECK IF WORKS WITH MULTIPLE BUILDINGS!!!)
    episode_actions = np.array(agent.action_tracker[-8759:])
    cooling = sum(abs(episode_actions[:,0]))
    writer.add_scalar("Action/Cooling", cooling, total_numsteps)
    if agent.act_size[0] == 2:
        dhw = sum(abs(episode_actions[:,1]))
        writer.add_scalar("Action/DHW", dhw, total_numsteps)
    writer.add_histogram("Action/Tracker", np.array(agent.action_tracker), total_numsteps)