def samplePredecessorState(self, state): """ Return a states drawn from *state*'s predecessor distribution Returns a possible predecessor state of *state* drawn from the predecessor state distribution according to its probability mass function. """ if self.stateCounter[state] == 0: raise ModelNotInitialized() def computeProbabilityFct(predState): if self.stateCounter[predState] == 0: return 0.0 else: return float(self.invStateTransitions[(state, predState)]) \ / self.stateCounter[predState] probabilityMassFunction = [(predState, computeProbabilityFct(predState)) for predState in self.states] randValue = random.random() accumulator = 0.0 for predState, probabilityMass in probabilityMassFunction: accumulator += probabilityMass if accumulator >= randValue: return predState
def sampleState(self): """ Return a state drawn randomly """ stateDensity = self.exampleSet.getStateDensity() if stateDensity != None: # TODO: Does it make sense to sample based on the data set? return State(stateDensity.resample(1).T[0]) else: raise ModelNotInitialized()
def sampleSuccessorState(self, state): """ Return a state drawn from the state's successor distribution """ if self._retrainingRequired(): self._updateModel() if self.succStateModel != None: return State(state + self.succStateModel.predict(state), state.dimensions) else: raise ModelNotInitialized()
def getExpectedReward(self, state): """ Returns the expected reward for the given state *state* """ if self._retrainingRequired(): self._updateModel() if self.rewardModel != None and self.exampleSet.states != None: nearestNeighbor = self.exampleSet.getNearestNeighbor(state) return float(self.rewardModel[nearestNeighbor]) else: raise ModelNotInitialized()
def getSuccessorDistribution(self, state): """ Return the successor distribution for the given state. Returns an iterator that yields pairs of grid nodes and their probabilities of being the successor of the given state. """ if self._retrainingRequired(): self._updateModel() if self.succStateModel != None: # This is a deterministic model! yield (State(state + self.succStateModel.predict(state), state.dimensions), 1.0) else: raise ModelNotInitialized()
def getExplorationValue(self, state): """ Return the exploratory value of the given state *state* The exploratory value of a state under this model is defined simply as the euclidean distance of from the state to its nearest neighbor in the example set. """ if self._retrainingRequired(): self._updateModel() if self.exampleSet.states != None: nearestNeighbor = self.exampleSet.getNearestNeighbor(state) dist = numpy.linalg.norm(nearestNeighbor - state) return -dist else: raise ModelNotInitialized()
def getSuccessorDistribution(self, state): """ Return the successor distribution for the given *state*. Returns an iterator that yields pairs of states and their probabilities of being the successor of the given *state*. """ if self.states == None: raise ModelNotInitialized() k = min(self.states.shape[0], self.k) if self.rebuildSucc: self.succKDTree = ann.kdtree(self.states) self.rebuildSucc = False indices, distances = self.succKDTree.knn(state, k) denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2))) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0 or numpy.isnan(denominator): import warnings warnings.warn( "Too large distances, returning only closest example") indices[0] = [indices[0][0]] distances[0] = [0.0] denominator = numpy.exp(0.0 / (self.b_Sa**2)) for index, distance in zip(indices[0], distances[0]): neighbor = State( self.states[index], state.dimensions) # TODO: not use state.dimensions succState, reward = self.successorSamples[neighbor] delta = succState - neighbor predictedSuccState = State(state + delta, state.dimensions) if not 0 <= gaussian(distance, self.b_Sa) / denominator <= 1: import warnings import sys warnings.warn("Invalid distances in KNN Model!") print distances sys.exit(0) yield predictedSuccState, gaussian(distance, self.b_Sa) / denominator
def getNearestNeighbors(self, state, k, b): """ Determines *k* most similar states to the given *state* Determines *k* most similar states to the given *state*. Returns an iterator over (weight, neighbor), where weight is the guassian weigthed influence of the neighbor onto *state*. The weight is computed via exp(-dist/b**2)/sum_over_neighbors(exp(-dist_1/b**2)). Note that the weights sum to 1. """ if self.states is not None: k = min(k, self.states.shape[1]) if hasattr(self, "kdTree"): # if we can use approximate nearest neighbor indices, distances = self.kdTree.knn(state, k=k) # Compute weights based on distance weights = numpy.exp(-distances[0] / (b**2)) denominator = numpy.sum(weights) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0: import warnings warnings.warn( "Too large distances, returning only closest example") indices[0] = [indices[0][0]] weights[0] = 1.0 else: # Normalize weights weights = weights / denominator for index, weight in zip(indices[0], weights): yield weight, State(self.states.T[index], state.dimensions) else: assert k == 1 minDist = numpy.inf closestSample = None for index in range(self.states.shape[1]): sampleState = self.states.T[index] dist = numpy.linalg.norm(state - sampleState) if dist < minDist: minDist = dist closestSample = sampleState yield 1.0, State(closestSample, state.dimensions) else: raise ModelNotInitialized("No state samples available")
def samplePredecessorState(self, state): """ Return a states drawn from *state*'s predecessor distribution Returns a possible predecessor state of *state* drawn from the predecessor state distribution according to its probability mass function. """ if self.succStates == None: raise ModelNotInitialized() predDistr = self.getPredecessorDistribution(state) randVal = random.uniform(0, 1) cumProb = 0.0 for predState, predProb in predDistr: cumProb += predProb if cumProb >= randVal: return predState assert False, "No predecessor state has been found!"
def drawTransitions(self, samples): """ Returns a random iterator over the transitions Returns a random iterator over the transitions that yields *samples* number of transitions from the dataset. If more samples are requested than contained in the data set, then data is reused. """ if self.states is None: raise ModelNotInitialized() counter = 0 while True: for i in numpy.random.permutation(range(self.states.shape[1])): yield (self.states[:, i], self.succStates[:, i], self.rewards[:, i]) counter += 1 if counter >= samples: return
def getSuccessorDistribution(self, state): """ Return the successor distribution for the given *state*. Returns an iterator that yields pairs of states and their probabilities of being the successor of the given *state*. """ if self.stateCounter[state] == 0: raise ModelNotInitialized() computeProbabilityFct = \ lambda succState: float(self.stateTransitions[(state, succState)]) \ / self.stateCounter[state] probabilityMassFunction = [(succState, computeProbabilityFct(succState)) for succState in self.states] for succState, probabilityMass in probabilityMassFunction: if probabilityMass > 0.0: yield succState, probabilityMass
def getPredecessorDistribution(self, state): """ Return a states drawn from *state*'s predecessor distribution Returns a possible predecessor state of *state* drawn from the predecessor state distribution according to its probability mass function. """ if self.succStates == None: raise ModelNotInitialized() k = min(self.states.shape[0], self.k) if self.rebuildPred: self.predKDTree = ann.kdtree(self.succStates) self.rebuildPred = False indices, distances = self.predKDTree.knn(state, k) denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2))) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0: import warnings warnings.warn("Too large distances, returing only closest example") indices[0] = [indices[0][0]] distances[0] = [0.0] denominator = numpy.exp(0.0 / (self.b_Sa**2)) for index, distance in zip(indices[0], distances[0]): neighbor = State( self.succStates[index], state.dimensions) # TODO: not use state.dimensions predState, reward = self.predecessorSamples[neighbor] delta = predState - neighbor predictedPredState = State(state + delta, state.dimensions) yield predictedPredState, gaussian(distance, self.b_Sa) / denominator
def sampleSuccessorState(self, state): """ Return a states drawn from *state*'s successor distribution Returns a possible successor state of *state* drawn from the successor state distribution according to its probability mass function. """ if self.stateCounter[state] == 0: raise ModelNotInitialized() computeProbabilityFct = \ lambda succState: float(self.stateTransitions[(state, succState)]) \ / self.stateCounter[state] probabilityMassFunction = [(succState, computeProbabilityFct(succState)) for succState in self.states] randValue = random.random() accumulator = 0.0 for succState, probabilityMass in probabilityMassFunction: accumulator += probabilityMass if accumulator >= randValue: return succState
def getPredecessorDistribution(self, state): """ Return a states drawn from *state*'s predecessor distribution Returns a possible predecessor state of *state* drawn from the predecessor state distribution according to its probability mass function. """ if self.stateCounter[state] == 0: raise ModelNotInitialized() def computeProbabilityFct(predState): if self.stateCounter[predState] == 0: return 0.0 else: return float(self.invStateTransitions[(state, predState)]) \ / self.stateCounter[predState] probabilityMassFunction = [(predState, computeProbabilityFct(predState)) for predState in self.states] for predState, probabilityMass in probabilityMassFunction: if probabilityMass > 0.0: yield predState, probabilityMass
def sampleState(self): """ Return a known state randomly sampled with uniform distribution""" if len(self.stateCounter.keys()) == 0: raise ModelNotInitialized() return random.choice(self.stateCounter.keys())
def getExpectedReward(self, state): """ Returns the expected reward for the given state """ if self.stateCounter[state] == 0: raise ModelNotInitialized() return self.accumulatedReward[state] / self.stateCounter[state]