def getMDPAction(self, x): maxVal = -10000000 maxGM = GM() bestAct = 0 for a in range(0, len(self.delA)): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = suma.pointEval(x) if (tmpVal > maxVal): maxVal = tmpVal maxGM = suma bestAct = a return bestAct
def MCPOMDP(b0, M=100, iterations=100, episodeLength=10): V = [] delA = [-1, 1, 0] delAVar = 0.1 R = GM(2, 0.1, 1) simLoopsN = 10 gamma = .9 #learning param alpha = 0.1 pz = [GM(), GM(), GM()] for i in range(-10, 2): pz[0].addG(Gaussian(i, 1, 1)) pz[1].addG(Gaussian(2, 1, 1)) for i in range(3, 10): pz[2].addG(Gaussian(i, 1, 1)) #until convergence or time for count in range(0, iterations): print(count) #sample x from b #[mean,var] = (b0.getMeans()[0],b0.getVars()[0]) #x = np.random.normal(mean,var); #sample particle set from b X = b0.sample(M) #for each episode? for l in range(0, episodeLength): part = np.random.choice(X) Q = [0] * len(delA) #for each action for a in range(0, len(delA)): #Simulate possible new beliefs for n in range(0, simLoopsN): x = part xprime = np.random.normal(x + delA[a], delAVar, size=1)[0].tolist() ztrial = [0] * len(pz) for i in range(0, len(pz)): ztrial[i] = pz[i].pointEval(xprime) z = ztrial.index(max(ztrial)) XPRIME = particleFilter(X, a, z, pz) Q[a] = Q[a] + (1 / simLoopsN) * gamma * ( R.pointEval(xprime) + shepardsInterpolation(V, XPRIME)) [distSort, dists, eta] = shepardsInterpolation(V, X, retDist=True) #update used value entries for i in range(0, len(distSort)): tmpVal = V[dists.index(distSort[i])][1] + alpha * eta * ( 1 / dists[dists.index(distSort[i])]) * ( max(Q) - V[dists.index(distSort[i])][1]) #V[dists.index(distSort[i])] = [V[dists.index(distSort[i])][0],tmpVal,Q.index(max(Q))]; V[dists.index(distSort[i])] = [ V[dists.index(distSort[i])][0], tmpVal, V[dists.index(distSort[i])][2] ] act = Q.index(max(Q)) V.append([X, max(Q), act]) xprime = np.random.normal(x + delA[act], delAVar, size=1)[0].tolist() ztrial = [0] * len(pz) for i in range(0, len(pz)): ztrial[i] = pz[i].pointEval(xprime) z = ztrial.index(max(ztrial)) Xprime = particleFilter(X, act, z, pz) x = xprime X = copy.deepcopy(Xprime) return V
def lwisUpdate(self, prior, softClass, numSamples, inverse=False): #Runs a likelihood weighted importance sampling update on a given gaussian q = GM() q.addG(Gaussian(prior.mean, prior.var, 1)) p = GM() p.addG(prior) x = q.sample(numSamples) w = np.zeros(numSamples) for i in range(0, numSamples): if (not inverse): w[i] = p.pointEval(x[i]) * self.pointEvalND( softClass, x[i]) / q.pointEval(x[i]) else: w[i] = p.pointEval(x[i]) * ( 1 - self.pointEvalND(softClass, x[i])) / q.pointEval(x[i]) suma = sum(w) for i in range(0, len(w)): w[i] = w[i] / suma muHat = np.zeros(len(prior.mean)) for i in range(0, numSamples): muHat = muHat + np.dot(x[i], w[i]) varHat = np.zeros(shape=(len(prior.mean), len(prior.mean))) for i in range(0, numSamples): xi = np.asarray(x[i]) varHat = varHat + w[i] * np.outer(xi, xi) varHat = varHat - np.outer(muHat, muHat) muHat = muHat.tolist() varHat = varHat.tolist() if (len(prior.mean) == 1): muHat = muHat[0] if (len(prior.var) == 1): varHat = varHat[0][0] #Calculate Weights #sample a bunch from the prior tmp = GM() tmp.addG(Gaussian(prior.mean, prior.var, 1)) tmpSamps = tmp.sample(500) #Find the likelihood at each sampled point probs = np.zeros(500).tolist() for i in range(0, 500): if (not inverse): probs[i] = self.pointEvalND(softClass, tmpSamps[i]) else: probs[i] = 1 - self.pointEvalND(softClass, tmpSamps[i]) #Find the average likelihood, which is the weight factor sumSamp = sum(probs) / 500 #Multiply the sampled weight factor by the previous weight #or add in log space logSamps = np.log(sumSamp) logWeight = np.log(prior.weight) + logSamps #Extract final weight weight = np.exp(logWeight) post = Gaussian(muHat, varHat, weight) return post