def setUp(self): if self.init_seq: self.y_queue = RandomAccessQueue(self.init_seq, maxlen=self.maxlen) self.t_queue = collections.deque(self.init_seq, maxlen=self.maxlen) else: self.y_queue = RandomAccessQueue(maxlen=self.maxlen) self.t_queue = collections.deque(maxlen=self.maxlen)
def stop_current_episode(self): if self.current_episode: new_normal_episode = None if len(self.current_episode) > 1: if len(self.good_episodic_memory ) >= self.good_episodic_memory_capacity: new_normal_episode = heapq.heappushpop( self.good_episodic_memory, (copy.copy(self.current_episode_R), copy.copy(self.episode_count), self.current_episode)) else: heapq.heappush( self.good_episodic_memory, (copy.copy(self.current_episode_R), copy.copy(self.episode_count), self.current_episode)) self.current_episode = [] self.episode_count += 1 new_bad_episode = None if new_normal_episode is not None: if len(self.normal_episodic_memory ) >= self.normal_episodic_memory_capacity: new_bad_episode = heapq.heappushpop( self.normal_episodic_memory, new_normal_episode) else: heapq.heappush(self.normal_episodic_memory, new_normal_episode) if new_bad_episode is not None: if len(self.bad_episodic_memory ) >= self.bad_episodic_memory_capacity: drop_episode = heapq.heappushpop(self.bad_episodic_memory, new_bad_episode) self.all_step_count -= len(drop_episode[2]) else: heapq.heappush(self.bad_episodic_memory, new_bad_episode) self.good_memory = RandomAccessQueue() for e in self.good_episodic_memory: self.good_memory.extend(e[2]) self.normal_memory = RandomAccessQueue() for e in self.normal_episodic_memory: self.normal_memory.extend(e[2]) self.bad_memory = RandomAccessQueue() for e in self.bad_episodic_memory: self.bad_memory.extend(e[2]) assert not self.current_episode self.current_episode_R = 0.0
def load(self, filename): with open(filename, 'rb') as f: self.memory = pickle.load(f) if isinstance(self.memory, collections.deque): # Load v0.2 self.memory = RandomAccessQueue( self.memory, maxlen=self.memory.maxlen)
def __init__( self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=1e-8, normalize_by_max=True, default_priority_func=None, uniform_ratio=0, wait_priority_after_sampling=True, return_sample_weights=True, error_min=None, error_max=None, ): self.current_episode = [] self.episodic_memory = PrioritizedBuffer( capacity=None, wait_priority_after_sampling=wait_priority_after_sampling) self.memory = RandomAccessQueue(maxlen=capacity) self.capacity_left = capacity self.default_priority_func = default_priority_func self.uniform_ratio = uniform_ratio self.return_sample_weights = return_sample_weights PriorityWeightError.__init__(self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max)
def __init__(self, capacity=None, num_steps=1): self.capacity = capacity assert num_steps > 0 self.num_steps = num_steps self.memory = RandomAccessQueue(maxlen=capacity) self.last_n_transitions = collections.defaultdict( lambda: collections.deque([], maxlen=num_steps))
def stop_current_episode(self): for ac in self.action_base_experience.keys(): self.action_memory[ac] = RandomAccessQueue() self.action_memory[ac].extend(self.action_base_experience[ac]) if self.current_episode: self.current_episode = []
def load(self, filename): with open(filename, 'rb') as f: memory = pickle.load(f) if isinstance(memory, tuple): self.memory, self.episodic_memory = memory else: # Load v0.2 # FIXME: The code works with EpisodicReplayBuffer # but not with PrioritizedEpisodicReplayBuffer self.memory = RandomAccessQueue(memory) self.episodic_memory = RandomAccessQueue() # Recover episodic_memory with best effort. episode = [] for item in self.memory: episode.append(item) if item['is_state_terminal']: self.episodic_memory.append(episode) episode = []
def __init__(self, capacity=None): self.current_episode = [] self.current_episode_R = 0.0 self.good_episodic_memory = [] self.good_episodic_memory_capacity = 20 self.good_memory = RandomAccessQueue() self.normal_episodic_memory = [] self.normal_episodic_memory_capacity = 50 self.normal_memory = RandomAccessQueue() self.bad_episodic_memory = [] self.bad_episodic_memory_capacity = 10 self.bad_memory = RandomAccessQueue() self.capacity = capacity self.all_step_count = 0 self.episode_count = 0
def load(self, filename): with open(filename, 'rb') as f: memory = pickle.load(f) if isinstance(memory, tuple): self.good_episodic_memory, self.normal_episodic_memory, self.bad_episodic_memory, self.all_step_count, self.episode_count = memory self.good_memory = RandomAccessQueue() for e in self.good_episodic_memory: self.good_memory.extend(e[2]) self.normal_memory = RandomAccessQueue() for e in self.normal_episodic_memory: self.normal_memory.extend(e[2]) self.bad_memory = RandomAccessQueue() for e in self.bad_episodic_memory: self.bad_memory.extend(e[2]) self.current_episode = [] self.current_episode_R = 0.0 else: print("bad replay file")
def __init__(self, capacity = 2000, lookup_k = 5, n_action = None, key_size = 256, xp = np): self.capacity = capacity self.memory = RandomAccessQueue(maxlen=capacity) self.lookup_k = lookup_k self.xp = xp self.num_action = n_action self.key_size = key_size assert self.num_action self.tmp_emb_arr = self.xp.empty((0, self.key_size), dtype='float32') self.knn = knn.ArgsortKnn(capacity = self.capacity, dimension=key_size, xp = self.xp)
def __init__(self, capacity=None): self.current_episode = [] self.episodic_memory = RandomAccessQueue() self.memory = RandomAccessQueue() self.capacity = capacity
def __init__(self, capacity=None): self.memory = RandomAccessQueue(maxlen=capacity)
def __init__(self, capacity=None): self.current_episode = collections.defaultdict(list) self.episodic_memory = RandomAccessQueue() self.memory = RandomAccessQueue() self.capacity = capacity