Esempio n. 1
0
def setup_experiment(sess, agent_config, env_config, paths, seed=None):
    """

    args
        sess (tf.Session)
        agent_config (dict)
        env_config (dict)
        paths (dict)
        seed (int)
    """

    env = energy_py.make_env(**env_config)
    save_args(env_config, path=paths['env_args'])

    if seed:
        logger.info('random seed is {}'.format(seed))
        env.seed(seed)

    agent_config['env'] = env
    agent_config['sess'] = sess
    agent_config['act_path'] = paths['tb_act']
    agent_config['learn_path'] = paths['tb_learn']

    agent = energy_py.make_agent(**agent_config)
    save_args(agent_config, path=paths['agent_args'])

    if hasattr(agent, 'acting_writer'):
        agent.acting_writer.add_graph(sess.graph)

    return agent, env
Esempio n. 2
0
def setup_agent(sess, double_q=False):
    """
    Sets up an agent & fills memory

    args
        sess (tf.Session)

    returns
        agent (energy_py DQN agent)
        env (energy_py Battery environment)
    """

    env = energy_py.make_env(
        '2048',
        observation_dims='2D'
    )

    #  use high learning rate to get weight changes
    agent = energy_py.make_agent(
        agent_id='dqn',
        sess=sess,
        env=env,
        total_steps=10,
        discount=0.9,
        memory_type='deque',
        learning_rate=1.0,
        double_q=double_q,
        update_target_net=100,

        network='conv',
        filters=(8, 16),
        kernels=(2, 2),
        strides=(1, 1)

    )

    for step in range(48):
        obs = env.observation_space.sample()
        action = env.action_space.sample()
        reward = random.random() * 10
        next_obs = env.observation_space.sample()
        done = random.choice([True, False])
        agent.remember(obs, action, reward, next_obs, done)

    batch = agent.memory.get_batch(agent.batch_size)

    return agent, batch, env
Esempio n. 3
0
import tensorflow as tf
import energy_py

with tf.Session() as sess:
    env = energy_py.make_env(env_id='battery',
                             episode_length=288,
                             dataset='example')

    agent = energy_py.make_agent(sess=sess,
                                 agent_id='dqn',
                                 env=env,
                                 total_steps=1000000)

    observation = env.reset()

    done = False
    while not done:
        action = agent.act(observation)
        next_observation, reward, done, info = env.step(action)
        training_info = agent.learn()
        observation = next_observation
Esempio n. 4
0
def experiment(agent_config, env_config, total_steps, paths, seed=None):
    """
    Run an experiment.  Episodes are run until total_steps are reached.

    args
        agent_config (dict)
        env_config (dict)
        total_steps (int)
        paths (dict)
        seed (int)

    Agent and environment are created from config dictionaries.
    """
    tf.reset_default_graph()
    with tf.Session() as sess:

        #  optionally set random seeds
        logger.info('random seed is {}'.format(seed))
        if seed:
            seed = int(seed)
            random.seed(seed)
            tf.set_random_seed(seed)
            np.random.seed(seed)

        env = energy_py.make_env(**env_config)
        save_args(env_config, path=paths['env_args'])

        #  add stuff into the agent config dict
        agent_config['env'] = env
        agent_config['env_repr'] = repr(env)
        agent_config['sess'] = sess
        agent_config['act_path'] = paths['tb_act']
        agent_config['learn_path'] = paths['tb_learn']

        #  init agent and save args
        agent = energy_py.make_agent(**agent_config)
        if hasattr(agent, 'acting_writer'):
            agent.acting_writer.add_graph(sess.graph)
        save_args(agent_config, path=paths['agent_args'])

        #  runner helps to manage our experiment
        runner = Runner(sess, paths, total_steps)

        #  outer while loop runs through multiple episodes
        step, episode = 0, 0
        while step < int(total_steps):
            episode += 1
            done = False
            observation = env.reset()

            #  inner while loop runs through a single episode
            while not done:
                step += 1
                #  select an action
                action = agent.act(observation)
                #  take one step through the environment
                next_observation, reward, done, info = env.step(action)
                #  store the experience
                agent.remember(observation, action, reward, next_observation,
                               done)
                runner.record_step(reward)
                #  moving to the next time step
                observation = next_observation

                #  fill the memory up halfway before we learn
                #  TODO the agent should decide what to do internally here
                if step > int(agent.memory.size * 0.5):
                    train_info = agent.learn()

            runner.record_episode(env_info=info)

            save_env_info(env, info, len(runner.episode_rewards),
                          paths['env_histories'])
Esempio n. 5
0
    def _act(self, observation):
        """
        Agent selects action randomly

        returns
             action (np.array)
        """
        return self.action_space.sample()


if __name__ == '__main__':
    import energy_py

    env = energy_py.make_env(
        'Flex-v1',
        flex_size=1,
        max_flex_time=4,
        relax_time=0,
        dataset='tempus')

    a = energy_py.make_agent('naive_flex', env=env, hours=(6, 10, 15, 19))

    o = env.reset()
    done = False
    while not done:
        action = a.act(o)

        o, r, done, i = env.step(action)

        print(action)