def setup_experiment(sess, agent_config, env_config, paths, seed=None): """ args sess (tf.Session) agent_config (dict) env_config (dict) paths (dict) seed (int) """ env = energy_py.make_env(**env_config) save_args(env_config, path=paths['env_args']) if seed: logger.info('random seed is {}'.format(seed)) env.seed(seed) agent_config['env'] = env agent_config['sess'] = sess agent_config['act_path'] = paths['tb_act'] agent_config['learn_path'] = paths['tb_learn'] agent = energy_py.make_agent(**agent_config) save_args(agent_config, path=paths['agent_args']) if hasattr(agent, 'acting_writer'): agent.acting_writer.add_graph(sess.graph) return agent, env
def test_lower_setpoint(): """ testing reducing the setpoint - increasing cooling generation """ # long release time so that we release it all during # the setpoint lowering env = energy_py.make_env('flex', capacity=2.0, release_time=10) obs = env.reset() done = False step = 0 while not done: act = np.array(0) if step >= 5 and step < 10: act = np.array(1) if step == 11: act = np.array(2) next_obs, r, done, i = env.step(act) step += 1 out = pd.DataFrame().from_dict(i) expt = out.iloc[:11, :] # check that we discharge everything that we stored np.testing.assert_equal(out.loc[:, 'stored'].sum(), out.loc[:, 'discharged'].sum()) # check we only discharge when we reduce the setpoint np.testing.assert_equal(0, out.loc[:10, 'discharged'].sum()) np.testing.assert_equal(out.loc[:, 'stored'].sum(), out.loc[11:11, 'discharged'].sum())
def test_load_pickle_memory(): env = energy_py.make_env('cartpole-v0') mem = energy_py.make_memory(memory_id='array', env=env) state = env.observation_space.sample() action = env.action_space.sample() reward = 1 next_state = env.observation_space.sample() done = False experience = state, action, reward, next_state, done mem.remember(*experience) mem.save('./results/test_mem.pkl') new_mem = energy_py.make_memory(load_path='./results/test_mem.pkl') saved_exp = new_mem[0] for exp, saved in zip(experience, saved_exp): exp, saved = np.array(exp), np.array(saved) np.testing.assert_equal(exp, saved)
def test_release_when_full(): # long release time so that we release it all during # the setpoint lowering capacity = 0.5 env = energy_py.make_env('flex', capacity=capacity, release_time=100) obs = env.reset() done = False step = 0 while not done: # always store act = np.array(1) next_obs, r, done, i = env.step(act) step += 1 out = pd.DataFrame().from_dict(i) expt = out.iloc[:, :] # calculate when we should have discharged cumulative_demand = [] for idx, row in expt.iterrows(): cumulative_demand.append(row.loc['site_demand']) if sum(cumulative_demand) >= capacity: np.testing.assert_almost_equal(row.loc['discharged'], sum(cumulative_demand)) cumulative_demand = []
def test_raise_sp(): """ testing raising the setpoint - reducing cooling generation """ env = energy_py.make_env('flex', capacity=2.0, release_time=4) obs = env.reset() done = False step = 0 while not done: act = np.array(0) if step >= 3 and step < 7: act = np.array(1) next_obs, r, done, i = env.step(act) step += 1 out = pd.DataFrame().from_dict(i) expt = out.iloc[:12, :] # check that we charge and discharge equal amounts of energy np.testing.assert_equal(expt['stored'].sum(), expt['discharged'].sum()) # check the timing of the discharge # making an assumption the capacity is big enough # could fail if I change the example dataset # maybe better to use a test dataset TODO np.testing.assert_array_equal(expt.loc[3:7, 'stored'], expt.loc[7:11, 'discharged']) np.testing.assert_array_equal(expt.loc[3, 'stored'], expt.loc[7, 'discharged'])
def test_no_op(): config = default_config config['initial_charge'] = 0.5 config['capacity'] = 4.0 env = energy_py.make_env(**config) obs = env.reset() rew, next_obs, d, i = env.step(0) charge = env.get_state_variable('C_charge_level [MWh]') expected_charge = 2.0 assert charge == expected_charge
def setup_agent(sess, double_q=False): """ Sets up an agent & fills memory args sess (tf.Session) returns agent (energy_py DQN agent) env (energy_py Battery environment) """ env = energy_py.make_env( '2048', observation_dims='2D' ) # use high learning rate to get weight changes agent = energy_py.make_agent( agent_id='dqn', sess=sess, env=env, total_steps=10, discount=0.9, memory_type='deque', learning_rate=1.0, double_q=double_q, update_target_net=100, network='conv', filters=(8, 16), kernels=(2, 2), strides=(1, 1) ) for step in range(48): obs = env.observation_space.sample() action = env.action_space.sample() reward = random.random() * 10 next_obs = env.observation_space.sample() done = random.choice([True, False]) agent.remember(obs, action, reward, next_obs, done) batch = agent.memory.get_batch(agent.batch_size) return agent, batch, env
def test_no_op(): env = energy_py.make_env('flex') obs = env.reset() done = False step = 0 while not done: act = np.array(0) next_obs, r, done, i = env.step(act) step += 1 info = pd.DataFrame().from_dict(i) check_energy_balance(info) np.testing.assert_equal(info['reward'].sum(), 0)
def test_decrease_setpoint(): """ tests the precooling - but sets release time and capacity high """ env = energy_py.make_env( 'flex', capacity=4.0, supply_capacity=100, # large to ignore the effect release_time=100, # large to ignore the effect supply_power=0.05, episode_length=10, episode_sample='random' ) obs = env.reset() env.seed(42) done = False step = 0 while not done: act = np.array(0) if (step >= 2) and (step <= 5): act = np.array(2) print(step, act) next_obs, r, done, i = env.step(act) step += 1 info = pd.DataFrame().from_dict(i) sub = info.loc[:, ['site_demand', 'site_consumption', 'setpoint', 'stored_supply']] print(sub.head(15)) np.testing.assert_array_equal( info.loc[:, 'site_consumption'].values[2:5], np.full(5-2, env.supply_power) ) np.testing.assert_almost_equal( env.supply_power * 4 / 12 - np.sum(info.loc[:, 'site_demand'].values[2:6]) / 12, info.loc[:, 'stored_supply'][5] )
def test_increase_setpoint(): env = energy_py.make_env( 'flex', capacity=4.0, supply_capacity=0.5, release_time=3, supply_power=0.05, episode_length=10, episode_sample='random' ) obs = env.reset() env.seed(42) done = False step = 0 while not done: act = np.array(0) if (step >= 2) and (step <= 4): act = np.array(1) print(step, act) next_obs, r, done, i = env.step(act) step += 1 info = pd.DataFrame().from_dict(i) sub = info.loc[:, ['site_demand', 'site_consumption', 'setpoint']] np.testing.assert_array_equal( info.loc[:, 'site_consumption'].values[2:5], np.zeros(5-2) ) np.testing.assert_array_almost_equal( info.loc[:, 'site_demand'].values[5:5+3] + info.loc[:, 'site_demand'].values[2:5], info.loc[:, 'site_consumption'].values[5:5+3] )
def test_no_op(): env = energy_py.make_env('flex') obs = env.reset() done = False step = 0 while not done: act = np.array(0) next_obs, r, done, i = env.step(act) step += 1 out = pd.DataFrame().from_dict(i) # this little bit of computation should be in the flex env out['base_costs'] = out['site_demand'] * out['electricity_price'] out['opt_costs'] = out['site_electricity_consumption'] * out[ 'electricity_price'] out['delta'] = out['base_costs'] - out['opt_costs'] np.testing.assert_equal(out['delta'].sum(), 0)
import energy_py default_config = { 'env_id': 'battery', 'dataset': 'example', 'initial_charge': 0, 'round_trip_eff': 0.9, 'episode_sample': 'full' } env = energy_py.make_env(**default_config) obs = env.reset() def test_charge(): rew, next_obs, d, i = env.step(1.0) charge = env.get_state_variable('C_charge_level [MWh]') expected_charge = 0.9 * 1.0 / 12 assert charge == expected_charge def test_discharge(): config = default_config config['initial_charge'] = 1.0 config['capacity'] = 4.0 env = energy_py.make_env(**config)
import tensorflow as tf import energy_py with tf.Session() as sess: env = energy_py.make_env(env_id='battery', episode_length=288, dataset='example') agent = energy_py.make_agent(sess=sess, agent_id='dqn', env=env, total_steps=1000000) observation = env.reset() done = False while not done: action = agent.act(observation) next_observation, reward, done, info = env.step(action) training_info = agent.learn() observation = next_observation
""" test - check that the step gives the correct one, test by running for n steps and comparing with the indexing method """ import energy_py import numpy as np env = energy_py.make_env('flex') def get_state_actions(state): actions = env.action_space.discretize(3) return [ np.concatenate([state.reshape(-1), action.reshape(-1)]) for action in actions ] step = 4 state = env.observation_space.data.iloc[step, :] state_actions = get_state_actions(state) actions = env.action_space.discrete_spaces # def get_viable_transitions(step, state, actions):
def experiment(agent_config, env_config, total_steps, paths, seed=None): """ Run an experiment. Episodes are run until total_steps are reached. args agent_config (dict) env_config (dict) total_steps (int) paths (dict) seed (int) Agent and environment are created from config dictionaries. """ tf.reset_default_graph() with tf.Session() as sess: # optionally set random seeds logger.info('random seed is {}'.format(seed)) if seed: seed = int(seed) random.seed(seed) tf.set_random_seed(seed) np.random.seed(seed) env = energy_py.make_env(**env_config) save_args(env_config, path=paths['env_args']) # add stuff into the agent config dict agent_config['env'] = env agent_config['env_repr'] = repr(env) agent_config['sess'] = sess agent_config['act_path'] = paths['tb_act'] agent_config['learn_path'] = paths['tb_learn'] # init agent and save args agent = energy_py.make_agent(**agent_config) if hasattr(agent, 'acting_writer'): agent.acting_writer.add_graph(sess.graph) save_args(agent_config, path=paths['agent_args']) # runner helps to manage our experiment runner = Runner(sess, paths, total_steps) # outer while loop runs through multiple episodes step, episode = 0, 0 while step < int(total_steps): episode += 1 done = False observation = env.reset() # inner while loop runs through a single episode while not done: step += 1 # select an action action = agent.act(observation) # take one step through the environment next_observation, reward, done, info = env.step(action) # store the experience agent.remember(observation, action, reward, next_observation, done) runner.record_step(reward) # moving to the next time step observation = next_observation # fill the memory up halfway before we learn # TODO the agent should decide what to do internally here if step > int(agent.memory.size * 0.5): train_info = agent.learn() runner.record_episode(env_info=info) save_env_info(env, info, len(runner.episode_rewards), paths['env_histories'])
def _act(self, observation): """ Agent selects action randomly returns action (np.array) """ return self.action_space.sample() if __name__ == '__main__': import energy_py env = energy_py.make_env( 'Flex-v1', flex_size=1, max_flex_time=4, relax_time=0, dataset='tempus') a = energy_py.make_agent('naive_flex', env=env, hours=(6, 10, 15, 19)) o = env.reset() done = False while not done: action = a.act(o) o, r, done, i = env.step(action) print(action)