Exemple #1
0
 def setUp(self):
     if self.init_seq:
         self.y_queue = RandomAccessQueue(self.init_seq, maxlen=self.maxlen)
         self.t_queue = collections.deque(self.init_seq, maxlen=self.maxlen)
     else:
         self.y_queue = RandomAccessQueue(maxlen=self.maxlen)
         self.t_queue = collections.deque(maxlen=self.maxlen)
Exemple #2
0
    def stop_current_episode(self):
        if self.current_episode:
            new_normal_episode = None
            if len(self.current_episode) > 1:
                if len(self.good_episodic_memory
                       ) >= self.good_episodic_memory_capacity:
                    new_normal_episode = heapq.heappushpop(
                        self.good_episodic_memory,
                        (copy.copy(self.current_episode_R),
                         copy.copy(self.episode_count), self.current_episode))
                else:
                    heapq.heappush(
                        self.good_episodic_memory,
                        (copy.copy(self.current_episode_R),
                         copy.copy(self.episode_count), self.current_episode))

            self.current_episode = []
            self.episode_count += 1

            new_bad_episode = None
            if new_normal_episode is not None:
                if len(self.normal_episodic_memory
                       ) >= self.normal_episodic_memory_capacity:
                    new_bad_episode = heapq.heappushpop(
                        self.normal_episodic_memory, new_normal_episode)
                else:
                    heapq.heappush(self.normal_episodic_memory,
                                   new_normal_episode)

            if new_bad_episode is not None:
                if len(self.bad_episodic_memory
                       ) >= self.bad_episodic_memory_capacity:
                    drop_episode = heapq.heappushpop(self.bad_episodic_memory,
                                                     new_bad_episode)
                    self.all_step_count -= len(drop_episode[2])
                else:
                    heapq.heappush(self.bad_episodic_memory, new_bad_episode)

            self.good_memory = RandomAccessQueue()
            for e in self.good_episodic_memory:
                self.good_memory.extend(e[2])

            self.normal_memory = RandomAccessQueue()
            for e in self.normal_episodic_memory:
                self.normal_memory.extend(e[2])

            self.bad_memory = RandomAccessQueue()
            for e in self.bad_episodic_memory:
                self.bad_memory.extend(e[2])

        assert not self.current_episode

        self.current_episode_R = 0.0
Exemple #3
0
 def load(self, filename):
     with open(filename, 'rb') as f:
         self.memory = pickle.load(f)
     if isinstance(self.memory, collections.deque):
         # Load v0.2
         self.memory = RandomAccessQueue(
             self.memory, maxlen=self.memory.maxlen)
 def __init__(
     self,
     capacity=None,
     alpha=0.6,
     beta0=0.4,
     betasteps=2e5,
     eps=1e-8,
     normalize_by_max=True,
     default_priority_func=None,
     uniform_ratio=0,
     wait_priority_after_sampling=True,
     return_sample_weights=True,
     error_min=None,
     error_max=None,
 ):
     self.current_episode = []
     self.episodic_memory = PrioritizedBuffer(
         capacity=None,
         wait_priority_after_sampling=wait_priority_after_sampling)
     self.memory = RandomAccessQueue(maxlen=capacity)
     self.capacity_left = capacity
     self.default_priority_func = default_priority_func
     self.uniform_ratio = uniform_ratio
     self.return_sample_weights = return_sample_weights
     PriorityWeightError.__init__(self,
                                  alpha,
                                  beta0,
                                  betasteps,
                                  eps,
                                  normalize_by_max,
                                  error_min=error_min,
                                  error_max=error_max)
 def __init__(self, capacity=None, num_steps=1):
     self.capacity = capacity
     assert num_steps > 0
     self.num_steps = num_steps
     self.memory = RandomAccessQueue(maxlen=capacity)
     self.last_n_transitions = collections.defaultdict(
         lambda: collections.deque([], maxlen=num_steps))
Exemple #6
0
    def stop_current_episode(self):
        for ac in self.action_base_experience.keys():
            self.action_memory[ac] = RandomAccessQueue()
            self.action_memory[ac].extend(self.action_base_experience[ac])

        if self.current_episode:
            self.current_episode = []
Exemple #7
0
    def load(self, filename):
        with open(filename, 'rb') as f:
            memory = pickle.load(f)
        if isinstance(memory, tuple):
            self.memory, self.episodic_memory = memory
        else:
            # Load v0.2
            # FIXME: The code works with EpisodicReplayBuffer
            # but not with PrioritizedEpisodicReplayBuffer
            self.memory = RandomAccessQueue(memory)
            self.episodic_memory = RandomAccessQueue()

            # Recover episodic_memory with best effort.
            episode = []
            for item in self.memory:
                episode.append(item)
                if item['is_state_terminal']:
                    self.episodic_memory.append(episode)
                    episode = []
Exemple #8
0
    def __init__(self, capacity=None):
        self.current_episode = []
        self.current_episode_R = 0.0

        self.good_episodic_memory = []
        self.good_episodic_memory_capacity = 20
        self.good_memory = RandomAccessQueue()

        self.normal_episodic_memory = []
        self.normal_episodic_memory_capacity = 50
        self.normal_memory = RandomAccessQueue()

        self.bad_episodic_memory = []
        self.bad_episodic_memory_capacity = 10
        self.bad_memory = RandomAccessQueue()

        self.capacity = capacity
        self.all_step_count = 0
        self.episode_count = 0
Exemple #9
0
    def load(self, filename):
        with open(filename, 'rb') as f:
            memory = pickle.load(f)
        if isinstance(memory, tuple):
            self.good_episodic_memory, self.normal_episodic_memory, self.bad_episodic_memory, self.all_step_count, self.episode_count = memory

            self.good_memory = RandomAccessQueue()
            for e in self.good_episodic_memory:
                self.good_memory.extend(e[2])

            self.normal_memory = RandomAccessQueue()
            for e in self.normal_episodic_memory:
                self.normal_memory.extend(e[2])

            self.bad_memory = RandomAccessQueue()
            for e in self.bad_episodic_memory:
                self.bad_memory.extend(e[2])

            self.current_episode = []
            self.current_episode_R = 0.0
        else:
            print("bad replay file")
    def __init__(self, capacity = 2000, lookup_k = 5, n_action = None,
                 key_size = 256, xp = np):
        
        self.capacity = capacity
        self.memory = RandomAccessQueue(maxlen=capacity)
        self.lookup_k = lookup_k
        self.xp = xp
        self.num_action = n_action
        self.key_size = key_size
        assert self.num_action

        self.tmp_emb_arr = self.xp.empty((0, self.key_size),
                                     dtype='float32')

        self.knn = knn.ArgsortKnn(capacity = self.capacity,
                                  dimension=key_size, xp = self.xp)
Exemple #11
0
 def __init__(self, capacity=None):
     self.current_episode = []
     self.episodic_memory = RandomAccessQueue()
     self.memory = RandomAccessQueue()
     self.capacity = capacity
Exemple #12
0
 def __init__(self, capacity=None):
     self.memory = RandomAccessQueue(maxlen=capacity)
Exemple #13
0
 def __init__(self, capacity=None):
     self.current_episode = collections.defaultdict(list)
     self.episodic_memory = RandomAccessQueue()
     self.memory = RandomAccessQueue()
     self.capacity = capacity