def symmetryData(data): symmetryData = History(data.stateDim, data.actionDim) for e in data: symmetryData.appendEpisode(e) # for e in data: # for s, a, r, _ in e: # symmetryData.append(-s, -a, r) # symmetryData.newEpisode() return symmetryData
def evaluateEpisodes(self, count, reset=True, exploration=False, visualize=True): self.setup() # tell agent that evaluation is taking place self.agent.evaluation = True currNumEpisodes = len(self.agent.history) # disable all explorers and store them for later if not exploration: explorers = [] for a in self.adapters_: if isinstance(a, Explorer) and a.active: explorers.append(a) a.active = False # run experiment for evaluation and store history self.runEpisodes(count, reset) self.numEpisodes -= count # copy the latest episodes to a new history history = History(self.agent.history.stateDim, self.agent.history.actionDim) history.episodes_ = self.agent.history.episodes_[currNumEpisodes:-1] # remove the evaluation histories from the agent self.agent.history.episodes_ = self.agent.history.episodes_[:currNumEpisodes] + [self.agent.history.episodes_[-1]] # enable exploration again if disabled before if not exploration: for a in explorers: a.active = True # tell agent that evaluation is over self.agent.evaluation = False if visualize: plt.ion() plt.clf() self.visual_x.append(self.numEpisodes) self.visual_y.append(mean([sum(e.rewards) for e in history])) plt.plot(self.visual_x, self.visual_y, 'o-', color='black') plt.gcf().canvas.draw() return history
def reduceData(data, buffer=4): """ goes through dataset and only stores the "interesting" samples, the ones where a change in reward has happened. """ reducedHistory = History(data.stateDim, data.actionDim) transitions = [] for episode in data: rdiff = episode.rewards[1:] - episode.rewards[:-1] transitions = set([0, len(episode.rewards)-1]) transitions.update(where(rdiff != 0)[0]) buf = set() for t in transitions: for i in range(max(0, t-buffer), min(t+2+buffer, len(episode.rewards))): buf.add(i) for b in buf: reducedHistory.append(episode.states[b,:], episode.actions[b,:], episode.rewards[b]) reducedHistory.newEpisode() return reducedHistory