def setup_experiment(sess, agent_config, env_config, paths, seed=None): """ args sess (tf.Session) agent_config (dict) env_config (dict) paths (dict) seed (int) """ env = energy_py.make_env(**env_config) save_args(env_config, path=paths['env_args']) if seed: logger.info('random seed is {}'.format(seed)) env.seed(seed) agent_config['env'] = env agent_config['sess'] = sess agent_config['act_path'] = paths['tb_act'] agent_config['learn_path'] = paths['tb_learn'] agent = energy_py.make_agent(**agent_config) save_args(agent_config, path=paths['agent_args']) if hasattr(agent, 'acting_writer'): agent.acting_writer.add_graph(sess.graph) return agent, env
def setup_agent(sess, double_q=False): """ Sets up an agent & fills memory args sess (tf.Session) returns agent (energy_py DQN agent) env (energy_py Battery environment) """ env = energy_py.make_env( '2048', observation_dims='2D' ) # use high learning rate to get weight changes agent = energy_py.make_agent( agent_id='dqn', sess=sess, env=env, total_steps=10, discount=0.9, memory_type='deque', learning_rate=1.0, double_q=double_q, update_target_net=100, network='conv', filters=(8, 16), kernels=(2, 2), strides=(1, 1) ) for step in range(48): obs = env.observation_space.sample() action = env.action_space.sample() reward = random.random() * 10 next_obs = env.observation_space.sample() done = random.choice([True, False]) agent.remember(obs, action, reward, next_obs, done) batch = agent.memory.get_batch(agent.batch_size) return agent, batch, env
import tensorflow as tf import energy_py with tf.Session() as sess: env = energy_py.make_env(env_id='battery', episode_length=288, dataset='example') agent = energy_py.make_agent(sess=sess, agent_id='dqn', env=env, total_steps=1000000) observation = env.reset() done = False while not done: action = agent.act(observation) next_observation, reward, done, info = env.step(action) training_info = agent.learn() observation = next_observation
def experiment(agent_config, env_config, total_steps, paths, seed=None): """ Run an experiment. Episodes are run until total_steps are reached. args agent_config (dict) env_config (dict) total_steps (int) paths (dict) seed (int) Agent and environment are created from config dictionaries. """ tf.reset_default_graph() with tf.Session() as sess: # optionally set random seeds logger.info('random seed is {}'.format(seed)) if seed: seed = int(seed) random.seed(seed) tf.set_random_seed(seed) np.random.seed(seed) env = energy_py.make_env(**env_config) save_args(env_config, path=paths['env_args']) # add stuff into the agent config dict agent_config['env'] = env agent_config['env_repr'] = repr(env) agent_config['sess'] = sess agent_config['act_path'] = paths['tb_act'] agent_config['learn_path'] = paths['tb_learn'] # init agent and save args agent = energy_py.make_agent(**agent_config) if hasattr(agent, 'acting_writer'): agent.acting_writer.add_graph(sess.graph) save_args(agent_config, path=paths['agent_args']) # runner helps to manage our experiment runner = Runner(sess, paths, total_steps) # outer while loop runs through multiple episodes step, episode = 0, 0 while step < int(total_steps): episode += 1 done = False observation = env.reset() # inner while loop runs through a single episode while not done: step += 1 # select an action action = agent.act(observation) # take one step through the environment next_observation, reward, done, info = env.step(action) # store the experience agent.remember(observation, action, reward, next_observation, done) runner.record_step(reward) # moving to the next time step observation = next_observation # fill the memory up halfway before we learn # TODO the agent should decide what to do internally here if step > int(agent.memory.size * 0.5): train_info = agent.learn() runner.record_episode(env_info=info) save_env_info(env, info, len(runner.episode_rewards), paths['env_histories'])
def _act(self, observation): """ Agent selects action randomly returns action (np.array) """ return self.action_space.sample() if __name__ == '__main__': import energy_py env = energy_py.make_env( 'Flex-v1', flex_size=1, max_flex_time=4, relax_time=0, dataset='tempus') a = energy_py.make_agent('naive_flex', env=env, hours=(6, 10, 15, 19)) o = env.reset() done = False while not done: action = a.act(o) o, r, done, i = env.step(action) print(action)