def beliefUpdate(self,b,a,o): btmp = GM(); for obs in self.pz[o].Gs: for bel in b.Gs: sj = np.matrix(bel.mean).T; si = np.matrix(obs.mean).T; delA = np.matrix(self.delA[a]).T; sigi = np.matrix(obs.var); sigj = np.matrix(bel.var); delAVar = np.matrix(self.delAVar); weight = obs.weight*bel.weight; weight = weight*mvn.pdf((sj+delA).T.tolist()[0],si.T.tolist()[0],np.add(sigi,sigj,delAVar)); var = (sigi.I + (sigj+delAVar).I).I; mean = var*(sigi.I*si + (sigj+delAVar).I*(sj+delA)); weight = weight.tolist(); mean = mean.T.tolist()[0]; var = var.tolist(); btmp.addG(Gaussian(mean,var,weight)); btmp.normalizeWeights(); btmp = btmp.kmeansCondensationN(self.maxMix); #btmp.condense(maxMix); btmp.normalizeWeights(); return btmp;
def beliefUpdate(self, b, a, o, maxMix=10): btmp = GM() for i in self.pz[o].Gs: for j in b.Gs: tmp = mvn.pdf( np.add(np.matrix(j.mean), np.matrix(self.delA[a])).tolist(), i.mean, self.covAdd(self.covAdd(i.var, j.var), self.delAVar)) #print(i.weight,j.weight,tmp); w = i.weight * j.weight * tmp.tolist() sig = (np.add( np.matrix(i.var).I, np.matrix(self.covAdd(j.var, self.delAVar)).I)).I.tolist() #sstmp = np.matrix(i.var).I*np.transpose(i.mean) + np.matrix(self.covAdd(j.var + self.delAVar)).I*np.transpose(np.add(np.matrix(j.mean),np.matrix(delA[a]))); sstmp1 = np.matrix(i.var).I * np.transpose(np.matrix(i.mean)) sstmp2 = np.matrix(self.covAdd(j.var, self.delAVar)).I sstmp21 = np.add(np.matrix(j.mean), np.matrix(self.delA[a])) sstmp3 = sstmp1 + sstmp2 * np.transpose(sstmp21) smean = np.transpose(sig * sstmp3).tolist()[0] btmp.addG(Gaussian(smean, sig, w)) btmp = btmp.kmeansCondensationN(maxMix) #btmp.condense(maxMix); btmp.normalizeWeights() return btmp
def beliefUpdate(self, b, a, o, mod): btmp = GM() for obs in mod.pz[o].Gs: for bel in b.Gs: sj = np.matrix(bel.mean).T si = np.matrix(obs.mean).T delA = np.matrix(mod.delA[a]).T sigi = np.matrix(obs.var) sigj = np.matrix(bel.var) delAVar = np.matrix(mod.delAVar) weight = obs.weight * bel.weight weight = weight * mvn.pdf( (sj + delA).T.tolist()[0], si.T.tolist()[0], np.add(sigi, sigj, delAVar)) var = (sigi.I + (sigj + delAVar).I).I mean = var * (sigi.I * si + (sigj + delAVar).I * (sj + delA)) weight = weight.tolist() mean = mean.T.tolist()[0] var = var.tolist() btmp.addG(Gaussian(mean, var, weight)) btmp.normalizeWeights() btmp = btmp.kmeansCondensationN(1) #btmp.condense(maxMix); btmp.normalizeWeights() return btmp
def backup(als, modes, delA, delAVar, pz, r, maxMix, b): newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))] for k in range(0, len(als))] for i in range(0, len(als)): for j in range(0, len(delA[0])): for k in range(0, len(pz)): newAls[i][j][k] = GM() for h in modes: tmpGM = als[i].GMProduct(pz[j]) mean = tmpGM.getMeans() for l in range(0, len(mean)): mean[l][0] -= delA[modes.index(h)][j] mean[l] = mean[l][0] var = tmpGM.getVars() for l in range(0, len(var)): var[l][0][0] += delAVar var[l] = var[l][0][0] weights = tmpGM.getWeights() tmpGM2 = GM() for l in range(0, len(mean)): tmpGM2.addG(Gaussian(mean[l], var[l], weights[l])) #tmpGM2 = GM(mean,var,tmpGM.getWeights()); newAls[i][j][k].addGM(tmpGM2.GMProduct(h)) bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(delA[0])): suma = GM() for o in range(0, len(pz)): suma.addGM(newAls[np.argmax([ continuousDot(newAls[j][a][o], b) for j in range(0, len(newAls)) ])][a][o]) suma.scalerMultiply(0.9) suma.addGM(r[a]) for g in suma.Gs: if (isinstance(g.mean, list)): g.mean = g.mean[0] g.var = g.var[0][0] suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20) tmp = continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def beliefUpdateSoftmax(self,b,a,o): btmp = GM(); btmp1 = GM(); for j in b.Gs: mean = (np.matrix(j.mean) + np.matrix(self.delA[a])).tolist()[0]; var = (np.matrix(j.var) + np.matrix(self.delAVar)).tolist(); weight = j.weight; btmp1.addG(Gaussian(mean,var,weight)); btmp = self.pz2.runVBND(btmp1,o); #btmp.condense(maxMix); btmp = btmp.kmeansCondensationN(self.maxMix); btmp.normalizeWeights(); return btmp;
def beliefUpdate(modes,delA,delAVar,pz,bels,a,o,cond = -1): #Initialize btmp = GM(); for d in bels.Gs: for h in modes: for f in h.Gs: for l in pz[o].Gs: C1 = 1/(1/f.var + 1/d.var); c1 = C1*((1/f.var)*f.mean + (1/d.var)*d.mean); C2 = C1 + delAVar; c2 = c1+delA[modes.index(h)][a]; weight = d.weight*f.weight*l.weight*mvn.pdf(l.mean,c2,l.var+C2); var = 1/((1/l.var)+(1/C2)); mean = var*((1/l.var)*l.mean + (1/C2)*c2); g = Gaussian(mean,var,weight); btmp.addG(g); btmp.normalizeWeights(); if(cond != -1): btmp = btmp.kmeansCondensationN(k=cond,lowInit = -20,highInit=20); #btmp.condense(cond); for g in btmp: while(isinstance(g.var,list)): g.var = g.var[0]; btmp.display(); return btmp;
def backup(als,modes,delA,delAVar,pz,r,maxMix,b): newAls = [[[0 for i in range(0,len(pz))] for j in range(0,len(delA[0]))] for k in range(0,len(als))]; for i in range(0,len(als)): for j in range(0,len(delA[0])): for k in range(0,len(pz)): newAls[i][j][k] = GM(); for h in range(0,len(modes.weights)): #print(als[i].getVars()); tmp1 = modes.runVB(als[i],h); for l in range(0,pz[k].size): for p in range(0,tmp1.size): mixp = tmp1.Gs[p]; mixl = pz[k].Gs[l]; weight1 = mixp.weight*mixl.weight; weight = weight1*mvn.pdf(mixp.mean,mixl.mean,mixp.var+mixl.var); c2 = (mixp.var**-1 + mixl.var**-1)**-1; c1 = c2*(mixp.var**-1 * mixp.mean + mixl.var**-1 * mixl.mean); mean = c1-delA[h][j]; var = c2+delAVar; newAls[i][j][k].addG(Gaussian(mean,var,weight)); bestVal = -10000000000; bestAct= 0; bestGM = []; for a in range(0,len(delA[0])): suma = GM(); for o in range(0,len(pz)): suma.addGM(newAls[np.argmax([continuousDot(newAls[j][a][o],b) for j in range(0,len(newAls))])][a][o]); suma.scalerMultiply(0.9); suma.addGM(r[a]); for g in suma.Gs: if(isinstance(g.mean,list)): g.mean = g.mean[0]; if(isinstance(g.var,list)): g.var = g.var[0][0]; suma = suma.kmeansCondensationN(k=maxMix,lowInit = -20,highInit=20); tmp = continuousDot(suma,b); #print(a,tmp); if(tmp > bestVal): bestAct = a; bestGM = copy.deepcopy(suma); bestVal = tmp; bestGM.action = bestAct; return bestGM;
def MDPValueIteration(self, gen=True): if (gen): #Intialize Value function self.ValueFunc = copy.deepcopy(self.r) for g in self.ValueFunc.Gs: g.weight = -1000 comparision = GM() comparision.addG( Gaussian( [1, 0, 0, 0], [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], 1)) uniform = GM() for i in range(0, 5): for j in range(0, 5): for k in range(0, 5): for l in range(0, 5): uniform.addG( Gaussian([i, j, k, l], [[4, 0, 0, 0], [0, 4, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]], 1)) count = 0 #until convergence while (not self.ValueFunc.comp(comparision) and count < 30): print(count) comparision = copy.deepcopy(self.ValueFunc) count += 1 #print(count); maxVal = -10000000 maxGM = GM() for a in range(0, 2): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = self.continuousDot(uniform, suma) if (tmpVal > maxVal): maxVal = tmpVal maxGM = copy.deepcopy(suma) maxGM.scalerMultiply(self.discount) maxGM = maxGM.kmeansCondensationN(20) self.ValueFunc = copy.deepcopy(maxGM) #self.ValueFunc.display(); #self.ValueFunc.plot2D(); print("MDP Value Iteration Complete") #f = open("../policies/MDP4DIntercept.npy","w"); #np.save(f,self.ValueFunc); file = "policies/MDP4DIntercept" self.ValueFunc.printGMArrayToFile([self.ValueFunc], file) else: #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist(); file = "policies/MDP4DIntercept" tmp = GM() self.ValueFunc = tmp.readGMArray4D(file)[0]