Beispiel #1
0
    def samplePredecessorState(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        """
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        def computeProbabilityFct(predState):
            if self.stateCounter[predState] == 0:
                return 0.0
            else:
                return float(self.invStateTransitions[(state, predState)]) \
                                    / self.stateCounter[predState]

        probabilityMassFunction = [(predState,
                                    computeProbabilityFct(predState))
                                   for predState in self.states]

        randValue = random.random()
        accumulator = 0.0
        for predState, probabilityMass in probabilityMassFunction:
            accumulator += probabilityMass
            if accumulator >= randValue:
                return predState
Beispiel #2
0
 def sampleState(self):
     """ Return a state drawn randomly """
     stateDensity = self.exampleSet.getStateDensity()
     if stateDensity != None:
         # TODO: Does it make sense to sample based on the data set?
         return State(stateDensity.resample(1).T[0])
     else:
         raise ModelNotInitialized()
Beispiel #3
0
 def sampleSuccessorState(self, state):
     """ Return a state drawn from the state's successor distribution """
     if self._retrainingRequired():
         self._updateModel()
     if self.succStateModel != None:
         return State(state + self.succStateModel.predict(state),
                      state.dimensions)
     else:
         raise ModelNotInitialized()
Beispiel #4
0
    def getExpectedReward(self, state):
        """ Returns the expected reward for the given state *state* """
        if self._retrainingRequired():
            self._updateModel()

        if self.rewardModel != None and self.exampleSet.states != None:
            nearestNeighbor = self.exampleSet.getNearestNeighbor(state)
            return float(self.rewardModel[nearestNeighbor])
        else:
            raise ModelNotInitialized()
Beispiel #5
0
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given state. 
        
        Returns an iterator that yields pairs of grid nodes and
        their probabilities of being the successor of the given state. 
        """
        if self._retrainingRequired():
            self._updateModel()

        if self.succStateModel != None:
            # This is a deterministic model!
            yield (State(state + self.succStateModel.predict(state),
                         state.dimensions), 1.0)
        else:
            raise ModelNotInitialized()
Beispiel #6
0
    def getExplorationValue(self, state):
        """ Return the exploratory value of the given state *state*
        
        The exploratory value of a state under this model is defined simply as
        the euclidean distance of from the state to its nearest neighbor in the
        example set. 
        """
        if self._retrainingRequired():
            self._updateModel()

        if self.exampleSet.states != None:
            nearestNeighbor = self.exampleSet.getNearestNeighbor(state)
            dist = numpy.linalg.norm(nearestNeighbor - state)
            return -dist
        else:
            raise ModelNotInitialized()
Beispiel #7
0
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given *state*. 
        
        Returns an iterator that yields pairs of states and
        their probabilities of being the successor of the given *state*. 
        """
        if self.states == None:
            raise ModelNotInitialized()

        k = min(self.states.shape[0], self.k)

        if self.rebuildSucc:
            self.succKDTree = ann.kdtree(self.states)
            self.rebuildSucc = False

        indices, distances = self.succKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0 or numpy.isnan(denominator):
            import warnings
            warnings.warn(
                "Too large distances, returning only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                self.states[index],
                state.dimensions)  # TODO: not use state.dimensions
            succState, reward = self.successorSamples[neighbor]

            delta = succState - neighbor
            predictedSuccState = State(state + delta, state.dimensions)

            if not 0 <= gaussian(distance, self.b_Sa) / denominator <= 1:
                import warnings
                import sys
                warnings.warn("Invalid distances in KNN Model!")
                print distances
                sys.exit(0)

            yield predictedSuccState, gaussian(distance,
                                               self.b_Sa) / denominator
Beispiel #8
0
    def getNearestNeighbors(self, state, k, b):
        """ Determines *k* most similar states to the given *state*
        
        Determines *k* most similar states to the given *state*. Returns an
        iterator over (weight, neighbor), where weight is the guassian weigthed
        influence of the neighbor onto *state*. The weight is computed via
        exp(-dist/b**2)/sum_over_neighbors(exp(-dist_1/b**2)).
        Note that the weights sum to 1.
        """
        if self.states is not None:
            k = min(k, self.states.shape[1])

            if hasattr(self,
                       "kdTree"):  # if we can use approximate nearest neighbor
                indices, distances = self.kdTree.knn(state, k=k)

                # Compute weights based on distance
                weights = numpy.exp(-distances[0] / (b**2))
                denominator = numpy.sum(weights)

                # If the distances become too large, then all values can become zero
                # In this situation, we simply return the closest state and probability 1.
                if denominator == 0:
                    import warnings
                    warnings.warn(
                        "Too large distances, returning only closest example")
                    indices[0] = [indices[0][0]]
                    weights[0] = 1.0
                else:
                    # Normalize weights
                    weights = weights / denominator

                for index, weight in zip(indices[0], weights):
                    yield weight, State(self.states.T[index], state.dimensions)
            else:
                assert k == 1
                minDist = numpy.inf
                closestSample = None
                for index in range(self.states.shape[1]):
                    sampleState = self.states.T[index]
                    dist = numpy.linalg.norm(state - sampleState)
                    if dist < minDist:
                        minDist = dist
                        closestSample = sampleState
                yield 1.0, State(closestSample, state.dimensions)
        else:
            raise ModelNotInitialized("No state samples available")
Beispiel #9
0
    def samplePredecessorState(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        """
        if self.succStates == None:
            raise ModelNotInitialized()

        predDistr = self.getPredecessorDistribution(state)
        randVal = random.uniform(0, 1)
        cumProb = 0.0
        for predState, predProb in predDistr:
            cumProb += predProb
            if cumProb >= randVal:
                return predState

        assert False, "No predecessor state has been found!"
Beispiel #10
0
    def drawTransitions(self, samples):
        """ Returns a random iterator over the transitions
        
        Returns a random iterator over the transitions that yields *samples* 
        number of transitions from the dataset. If more samples are requested
        than contained in the data set, then data is reused.
        """
        if self.states is None:
            raise ModelNotInitialized()

        counter = 0
        while True:
            for i in numpy.random.permutation(range(self.states.shape[1])):
                yield (self.states[:, i], self.succStates[:,
                                                          i], self.rewards[:,
                                                                           i])
                counter += 1
                if counter >= samples: return
Beispiel #11
0
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given *state*. 
        
        Returns an iterator that yields pairs of states and
        their probabilities of being the successor of the given *state*. 
        """
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        computeProbabilityFct = \
            lambda succState: float(self.stateTransitions[(state, succState)]) \
                                    / self.stateCounter[state]
        probabilityMassFunction = [(succState,
                                    computeProbabilityFct(succState))
                                   for succState in self.states]

        for succState, probabilityMass in probabilityMassFunction:
            if probabilityMass > 0.0:
                yield succState, probabilityMass
Beispiel #12
0
    def getPredecessorDistribution(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        """
        if self.succStates == None:
            raise ModelNotInitialized()

        k = min(self.states.shape[0], self.k)

        if self.rebuildPred:
            self.predKDTree = ann.kdtree(self.succStates)
            self.rebuildPred = False

        indices, distances = self.predKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0:
            import warnings
            warnings.warn("Too large distances, returing only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                self.succStates[index],
                state.dimensions)  # TODO: not use state.dimensions
            predState, reward = self.predecessorSamples[neighbor]

            delta = predState - neighbor
            predictedPredState = State(state + delta, state.dimensions)

            yield predictedPredState, gaussian(distance,
                                               self.b_Sa) / denominator
Beispiel #13
0
    def sampleSuccessorState(self, state):
        """ Return a states drawn from *state*'s successor distribution 
        
        Returns a possible successor state of *state* drawn from the successor 
        state distribution according to its probability mass function.
        """
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        computeProbabilityFct = \
            lambda succState: float(self.stateTransitions[(state, succState)]) \
                                    / self.stateCounter[state]
        probabilityMassFunction = [(succState,
                                    computeProbabilityFct(succState))
                                   for succState in self.states]

        randValue = random.random()
        accumulator = 0.0
        for succState, probabilityMass in probabilityMassFunction:
            accumulator += probabilityMass
            if accumulator >= randValue:
                return succState
Beispiel #14
0
    def getPredecessorDistribution(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        """
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        def computeProbabilityFct(predState):
            if self.stateCounter[predState] == 0:
                return 0.0
            else:
                return float(self.invStateTransitions[(state, predState)]) \
                                    / self.stateCounter[predState]

        probabilityMassFunction = [(predState,
                                    computeProbabilityFct(predState))
                                   for predState in self.states]

        for predState, probabilityMass in probabilityMassFunction:
            if probabilityMass > 0.0:
                yield predState, probabilityMass
Beispiel #15
0
    def sampleState(self):
        """ Return a known state randomly sampled with uniform distribution"""
        if len(self.stateCounter.keys()) == 0:
            raise ModelNotInitialized()

        return random.choice(self.stateCounter.keys())
Beispiel #16
0
    def getExpectedReward(self, state):
        """ Returns the expected reward for the given state """
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        return self.accumulatedReward[state] / self.stateCounter[state]