Esempio n. 1
0
    def __init__(self, args, env):
        self.args = args
        self.env = env
        self.evolver = utils_ne.SSNE(self.args)
        self.best_r = 0
        self.best_state = []

        #Init population
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))

        #Turn off gradients and put in eval mode
        for actor in self.pop:
            actor.eval()

        #Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.ounoise = ddpg.OUNoise(args.action_dim)

        #Trackers
        self.num_games = 0
        self.num_frames = 0
        self.gen_frames = None
Esempio n. 2
0
    def __init__(self, args, env):
        self.args = args
        self.evolver = utils_ne.SSNE(self.args)
        # self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))
        for actor in self.pop: actor.eval()

        # self.workers = [Worker.remote(args) for _ in range(self.args.pop_size+1)]

        # args.is_cuda = True; args.is_memory_cuda = True
        self.rl_agent = ddpg.DDPG(args)
        # self.rl_agent.share_memory()

        self.ounoise = ddpg.OUNoise(args.action_dim)
        # self.replay_queue = mp.Manager().Queue()  # mp.Manager().list()
        # self.replay_queue = mp.Queue()
        # self.replay_memory = mp.Manager().list()
        # self.replay_memory = mp.Array()
        # self.replay_memory = mp.Queue()



        self.workers = self.pop.append(self.rl_agent.actor)

        # for key in range(self.args.pop_size):
        #     self.replay_memory[key] = replay_memory.ReplayMemory(self.args.buffer_size)

        # self.learner = LearnerThread(self.replay_memory, self.rl_agent)
        # self.learner.start()
        # Stats
        # self.timers = {
        #     k: TimerStat()
        #     for k in [
        #     "put_weights", "get_samples", "sample_processing",
        #     "replay_processing", "update_priorities", "train", "sample"
        # ]
        # }

        self.num_games = 0; self.num_frames = 0; self.gen_frames = 0; self.len_replay = 0
Esempio n. 3
0
        # print()
        if not self.replay_memory.empty():
            print(self.replay_memory.qsize())
            time.sleep(1)
            print(self.replay_memory.get_nowait())


if __name__ == "__main__":
    args = Parameters()
    env = utils.NormalizedActions(gym.make(env_tag))
    args.action_dim = env.action_space.shape[0]
    args.state_dim = env.observation_space.shape[0]

    pop = []
    for _ in range(1):
        pop.append(ddpg.Actor(args))
    for actor in pop:
        actor.eval()

    mp.set_start_method('spawn')

    replay_memory = mp.Queue()

    learner = LearnerThread(replay_memory)
    learner.start()
    # dict_all_returns = mp.Manager().dict()
    processes = []

    time_start = time.time()
    for key, pop in enumerate(pop):
        pop.share_memory()