예제 #1
0
    def getMDPAction(self, x):
        maxVal = -10000000
        maxGM = GM()
        bestAct = 0
        for a in range(0, len(self.delA)):
            suma = GM()
            for g in self.ValueFunc.Gs:
                mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist()
                var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist()
                suma.addG(Gaussian(mean, var, g.weight))
            suma.addGM(self.r)

            tmpVal = suma.pointEval(x)
            if (tmpVal > maxVal):
                maxVal = tmpVal
                maxGM = suma
                bestAct = a
        return bestAct
def MCPOMDP(b0, M=100, iterations=100, episodeLength=10):
    V = []
    delA = [-1, 1, 0]
    delAVar = 0.1
    R = GM(2, 0.1, 1)
    simLoopsN = 10
    gamma = .9

    #learning param
    alpha = 0.1

    pz = [GM(), GM(), GM()]
    for i in range(-10, 2):
        pz[0].addG(Gaussian(i, 1, 1))
    pz[1].addG(Gaussian(2, 1, 1))
    for i in range(3, 10):
        pz[2].addG(Gaussian(i, 1, 1))

    #until convergence or time
    for count in range(0, iterations):
        print(count)
        #sample x from b
        #[mean,var] = (b0.getMeans()[0],b0.getVars()[0])
        #x = np.random.normal(mean,var);

        #sample particle set from b
        X = b0.sample(M)

        #for each episode?
        for l in range(0, episodeLength):
            part = np.random.choice(X)
            Q = [0] * len(delA)
            #for each action
            for a in range(0, len(delA)):
                #Simulate possible new beliefs
                for n in range(0, simLoopsN):
                    x = part
                    xprime = np.random.normal(x + delA[a], delAVar,
                                              size=1)[0].tolist()
                    ztrial = [0] * len(pz)
                    for i in range(0, len(pz)):
                        ztrial[i] = pz[i].pointEval(xprime)
                    z = ztrial.index(max(ztrial))
                    XPRIME = particleFilter(X, a, z, pz)
                    Q[a] = Q[a] + (1 / simLoopsN) * gamma * (
                        R.pointEval(xprime) + shepardsInterpolation(V, XPRIME))

            [distSort, dists, eta] = shepardsInterpolation(V, X, retDist=True)
            #update used value entries
            for i in range(0, len(distSort)):
                tmpVal = V[dists.index(distSort[i])][1] + alpha * eta * (
                    1 / dists[dists.index(distSort[i])]) * (
                        max(Q) - V[dists.index(distSort[i])][1])
                #V[dists.index(distSort[i])] = [V[dists.index(distSort[i])][0],tmpVal,Q.index(max(Q))];
                V[dists.index(distSort[i])] = [
                    V[dists.index(distSort[i])][0], tmpVal,
                    V[dists.index(distSort[i])][2]
                ]

            act = Q.index(max(Q))
            V.append([X, max(Q), act])

            xprime = np.random.normal(x + delA[act], delAVar,
                                      size=1)[0].tolist()
            ztrial = [0] * len(pz)
            for i in range(0, len(pz)):
                ztrial[i] = pz[i].pointEval(xprime)
            z = ztrial.index(max(ztrial))
            Xprime = particleFilter(X, act, z, pz)
            x = xprime
            X = copy.deepcopy(Xprime)

    return V
예제 #3
0
    def lwisUpdate(self, prior, softClass, numSamples, inverse=False):
        #Runs a likelihood weighted importance sampling update on a given gaussian
        q = GM()
        q.addG(Gaussian(prior.mean, prior.var, 1))

        p = GM()
        p.addG(prior)

        x = q.sample(numSamples)

        w = np.zeros(numSamples)
        for i in range(0, numSamples):
            if (not inverse):
                w[i] = p.pointEval(x[i]) * self.pointEvalND(
                    softClass, x[i]) / q.pointEval(x[i])
            else:
                w[i] = p.pointEval(x[i]) * (
                    1 - self.pointEvalND(softClass, x[i])) / q.pointEval(x[i])

        suma = sum(w)
        for i in range(0, len(w)):
            w[i] = w[i] / suma

        muHat = np.zeros(len(prior.mean))
        for i in range(0, numSamples):
            muHat = muHat + np.dot(x[i], w[i])

        varHat = np.zeros(shape=(len(prior.mean), len(prior.mean)))
        for i in range(0, numSamples):
            xi = np.asarray(x[i])
            varHat = varHat + w[i] * np.outer(xi, xi)
        varHat = varHat - np.outer(muHat, muHat)

        muHat = muHat.tolist()
        varHat = varHat.tolist()
        if (len(prior.mean) == 1):
            muHat = muHat[0]
        if (len(prior.var) == 1):
            varHat = varHat[0][0]

        #Calculate Weights
        #sample a bunch from the prior
        tmp = GM()
        tmp.addG(Gaussian(prior.mean, prior.var, 1))
        tmpSamps = tmp.sample(500)

        #Find the likelihood at each sampled point
        probs = np.zeros(500).tolist()
        for i in range(0, 500):
            if (not inverse):
                probs[i] = self.pointEvalND(softClass, tmpSamps[i])
            else:
                probs[i] = 1 - self.pointEvalND(softClass, tmpSamps[i])
        #Find the average likelihood, which is the weight factor
        sumSamp = sum(probs) / 500

        #Multiply the sampled weight factor by the previous weight
        #or add in log space
        logSamps = np.log(sumSamp)
        logWeight = np.log(prior.weight) + logSamps
        #Extract final weight
        weight = np.exp(logWeight)

        post = Gaussian(muHat, varHat, weight)

        return post