Example #1
0
    def test_create_dict(self):
        env_dict = create_env_dict(self.env)

        self.assertIn("obs", env_dict)
        self.assertIn("act0", env_dict)
        self.assertIn("act1", env_dict)
        self.assertIn("act2", env_dict)
    def __init__(self, max_length, seed_number, env):
        env_dict = create_env_dict(env)

        #override the observation length in the replay memory
        env_dict['obs'] = {"dtype": numpy.float32, "shape": (17, )}
        env_dict['next_obs'] = {"dtype": numpy.float32, "shape": (17, )}
        print('!!!!', env_dict['obs'])
        self.before_add = create_before_add_func(env)
        self.storage = ReplayBuffer(max_length, env_dict)
Example #3
0
    def test_add(self):
        env_dict = create_env_dict(self.env)
        before_add_func = create_before_add_func(self.env)

        rb = ReplayBuffer(256, env_dict)

        obs = self.env.reset()

        for i in range(100):
            act = self.env.action_space.sample()
            next_obs, rew, done, _ = self.env.step(act)

            rb.add(**before_add_func(obs, act, next_obs, rew, done))

            if done:
                obs = self.env.reset()
            else:
                obs = next_obs
Example #4
0
 def __init__(self, max_length, seed_number, env):
     env_dict = create_env_dict(env)
     self.before_add = create_before_add_func(env)
     self.storage = ReplayBuffer(max_length, env_dict)
Example #5
0
#                 q.put(w)

# %% run_cell
if __name__ == "__main__":
    ENV_ID = 'pong'
    THREADS = 4
    mp.set_start_method('spawn')
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    # device = 'cpu'

    params = data.params[ENV_ID]
    test_env = createEnv(params.env)

    shape = test_env.observation_space.shape
    actions = test_env.action_space.n
    env_dict = create_env_dict(test_env)

    net = model.DDQN(shape, actions).to(device)
    net.share_memory()
    tgt_net = ptan.agent.TargetNet(net)

    selector = ptan.actions.ArgmaxActionSelector()
    agent = ptan.agent.DQNAgent(net,
                                selector,
                                device=device,
                                preprocessor=preprocess)

    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)

    done_training = mp.Event()
    done_training.clear()
# create_buffer_with_helper_func.py
#
# Create `ReplayBuffer` for non simple space `gym.Env` with helper functions.


import gym
from cpprb import ReplayBuffer, create_env_dict, create_before_add_func

env = gym.make("Blackjack-v0")
# https://github.com/openai/gym/blob/master/gym/envs/toy_text/blackjack.py
# BlackjackEnv
#   observation_space: Tuple(Discrete(32),Discrete(11),Discrete(2))
#   action_space     : Discrete(2)


env_dict = create_env_dict(env)
# >>> env_dict
#{'act': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'done': {'add_shape': array([-1,  1]), 'dtype': numpy.float32, 'shape': 1},
# 'next_obs0': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'next_obs1': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'next_obs2': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'obs0': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'obs1': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'obs2': {'add_shape': array([-1,  1]), 'dtype': numpy.int32, 'shape': 1},
# 'rew': {'add_shape': array([-1,  1]), 'dtype': numpy.float32, 'shape': 1}}



rb = ReplayBuffer(256, env_dict)