def test_load_pickle_memory(): env = energypy.make_env('cartpole-v0') mem = energypy.make_memory(memory_id='array', env=env) state = env.observation_space.sample() action = env.action_space.sample() reward = 1 next_state = env.observation_space.sample() done = False experience = state, action, reward, next_state, done mem.remember(*experience) mem.save('./results/test_mem.pkl') new_mem = energypy.make_memory(load_path='./results/test_mem.pkl') saved_exp = new_mem[0] for exp, saved in zip(experience, saved_exp): exp, saved = np.array(exp), np.array(saved) np.testing.assert_equal(exp, saved)
def test_increase_setpoint(): env = energypy.make_env('flex', capacity=4.0, supply_capacity=0.5, release_time=3, supply_power=0.05, episode_length=10, episode_sample='random') obs = env.reset() env.seed(42) done = False step = 0 while not done: act = np.array(0) if (step >= 2) and (step <= 4): act = np.array(1) print(step, act) next_obs, r, done, i = env.step(act) step += 1 info = pd.DataFrame().from_dict(i) sub = info.loc[:, ['site_demand', 'site_consumption', 'setpoint']] np.testing.assert_array_equal(info.loc[:, 'site_consumption'].values[2:5], np.zeros(5 - 2)) np.testing.assert_array_almost_equal( info.loc[:, 'site_demand'].values[5:5 + 3] + info.loc[:, 'site_demand'].values[2:5], info.loc[:, 'site_consumption'].values[5:5 + 3])
def test_charge(action, initial_charge, expected_charge): env = energypy.make_env(**default_config) obs = env.reset() rew, next_obs, d, i = env.step(action) charge = env.get_state_variable('Charge [MWh]') assert charge == expected_charge
def test_discharge(): config = default_config config['initial_charge'] = 1.0 config['capacity'] = 4.0 env = energypy.make_env(**config) obs = env.reset() rew, next_obs, d, i = env.step(-1.0) charge = env.get_state_variable('C_charge_level [MWh]') expected_charge = 4.0 - 1.0 / 12 assert charge == expected_charge
def test_no_op(): config = default_config config['initial_charge'] = 0.5 config['capacity'] = 4.0 env = energypy.make_env(**config) obs = env.reset() rew, next_obs, d, i = env.step(0) charge = env.get_state_variable('Charge [MWh]') expected_charge = 2.0 assert charge == expected_charge
def test_discharge(): config = default_config config['initial_charge'] = 1.0 config['capacity'] = 4.0 env = energypy.make_env(**config) obs = env.reset() rew, next_obs, d, i = env.step(-1.0) charge = env.get_state_variable('Charge [MWh]') expected_charge = 4.0 - 1.0 / 12 assert charge == expected_charge losses = i['Loss [MW]'][-1] expected_losses = 1.0 * (1 - 0.9) np.testing.assert_allclose(losses, expected_losses)
def full(env): env = energypy.make_env( env, episode_sample='full' ) done = False env.reset() while not done: action = env.action_space.sample() s, r, done, i = env.step(action) i = pd.DataFrame().from_dict(i) assert i.shape[0] == env.state_space.data.shape[0]
def test_no_op(): env = energypy.make_env('flex') obs = env.reset() done = False step = 0 while not done: act = np.array(0) next_obs, r, done, i = env.step(act) step += 1 info = pd.DataFrame().from_dict(i) check_energy_balance(info) np.testing.assert_equal(info['reward'].sum(), 0)
def random(env): env = energypy.make_env( env, episode_sample='random', episode_length=24 ) done = False env.reset() while not done: action = env.action_space.sample() s, r, done, i = env.step(action) i = pd.DataFrame().from_dict(i) assert i.shape[0] == 24
def setup_experiment( sess, agent_config, env_config, paths, seed=None ): """ Initialize an experiment sess (tf.Session) agent_config (dict) env_config (dict) paths (dict) seed (int) """ env = energypy.make_env(**env_config) save_args(env_config, path=paths['env_args']) if seed: logger.info('random seed is {}'.format(seed)) env.seed(seed) agent_config['env'] = env agent_config['sess'] = sess agent_config['act_path'] = paths['tb_act'] agent_config['learn_path'] = paths['tb_learn'] agent_memory = agent_config.pop('load_memory', None) if agent_memory: agent_config['load_memory_path'] = paths['memory'] agent = energypy.make_agent(**agent_config) save_args(agent_config, path=paths['agent_args']) if hasattr(agent, 'acting_writer'): agent.acting_writer.add_graph(sess.graph) # TODO copy the dataset into the run folder as well logger.info('setup experiment of {} steps'.format(total_steps)) return agent, env
def experiment(njobs, num_rollouts, max_len): done = False env = energypy.make_env('battery', episode_length=20, episode_sample='fixed') agent = Agent(env, njobs, num_rollouts, max_len) _ = env.action_space.discretize(20) s = env.reset() len = 0 while not done: action = agent.get_action(deepcopy(env)) s, r, done, info = env.step(action) print(len, action, np.sum(info['reward']), info['reward'][-5:]) len += 1 print(np.sum(info['reward'])) print(len(info['reward']))
def test_decrease_setpoint(): """ tests the precooling - but sets release time and capacity high """ env = energypy.make_env( 'flex', capacity=4.0, supply_capacity=100, # large to ignore the effect release_time=100, # large to ignore the effect supply_power=0.05, episode_length=10, episode_sample='random') obs = env.reset() env.seed(42) done = False step = 0 while not done: act = np.array(0) if (step >= 2) and (step <= 5): act = np.array(2) print(step, act) next_obs, r, done, i = env.step(act) step += 1 info = pd.DataFrame().from_dict(i) sub = info.loc[:, [ 'site_demand', 'site_consumption', 'setpoint', 'stored_supply' ]] print(sub.head(15)) np.testing.assert_array_equal(info.loc[:, 'site_consumption'].values[2:5], np.full(5 - 2, env.supply_power)) np.testing.assert_almost_equal( env.supply_power * 4 / 12 - np.sum(info.loc[:, 'site_demand'].values[2:6]) / 12, info.loc[:, 'stored_supply'][5])
def test_env_lengths(env, sample_strat, episode_length): env = energypy.make_env(env_id=env, sample_strat=sample_strat, episode_length=episode_length) done = False env.reset() while not done: action = env.action_space.sample() s, r, done, i = env.step(action) for key, data in i.items(): if sample_strat == 'full': assert len(data) == env.state_space.num_samples else: assert len(data) == episode_length next_states = np.array(i['next_state']).reshape(-1, *env.state_space.shape)
def setup_agent(sess, double_q=False): """ Sets up an agent & fills memory args sess (tf.Session) returns agent (energypy DQN agent) env (energypy Battery environment) """ env = energypy.make_env('2048', observation_dims='2D') # use high learning rate to get weight changes agent = energypy.make_agent(agent_id='dqn', sess=sess, env=env, total_steps=10, discount=0.9, memory_type='deque', learning_rate=1.0, double_q=double_q, update_target_net=100, network='conv', filters=(8, 16), kernels=(2, 2), strides=(1, 1)) for step in range(48): obs = env.observation_space.sample() action = env.action_space.sample() reward = random.random() * 10 next_obs = env.observation_space.sample() done = random.choice([True, False]) agent.remember(obs, action, reward, next_obs, done) batch = agent.memory.get_batch(agent.batch_size) return agent, batch, env
def setup_run(cfg, run, sess): run_cfg = make_run_config(cfg, run) run_logger = make_new_logger('run_setup', run_cfg['run_dir']) runner = Runner(sess, run_cfg) dump_config(run_cfg, run_logger) env_config = run_cfg['env'] env = make_env(**env_config) if hasattr(env.observation_space, 'info') and hasattr( env.state_space, 'info'): run_logger.debug(json.dumps({'state_info': env.state_space.info})) run_logger.debug( json.dumps({'observation_info': env.observation_space.info})) agent_config = run_cfg['agent'] agent_config['env'] = env agent_config['sess'] = sess agent_config['tensorboard_dir'] = run_cfg['tensorboard_dir'] agent = make_agent(**agent_config) return run_cfg, agent, env, runner
import energypy default_config = { 'env_id': 'battery', 'dataset': 'example', 'initial_charge': 0, 'round_trip_eff': 0.9, 'episode_sample': 'full' } env = energypy.make_env(**default_config) obs = env.reset() def test_charge(): rew, next_obs, d, i = env.step(1.0) charge = env.get_state_variable('C_charge_level [MWh]') expected_charge = 0.9 * 1.0 / 12 assert charge == expected_charge def test_discharge(): config = default_config config['initial_charge'] = 1.0 config['capacity'] = 4.0 env = energypy.make_env(**config)
else: assert len(data) == episode_length next_states = np.array(i['next_state']).reshape(-1, *env.state_space.shape) # assert next_states[-1] == np.zeros((1, *env.state_space.shape)) if __name__ == '__main__': env = 'battery' sample_strat = 'full' episode_length = 4 env = energypy.make_env(env_id=env, sample_strat=sample_strat, episode_length=episode_length) done = False env.reset() while not done: action = env.action_space.sample() s, r, done, i = env.step(action) for key, data in i.items(): if sample_strat == 'full': assert len(data) == env.state_space.num_samples else: