def backup(self,b): G = self.Gamma; R = self.r; pz = self.pz; if(self.useSoft): obslen = pz.size; else: obslen = len(pz); als1 = self.preAls; bestVal = -10000000000; bestAct= 0; bestGM = []; for a in range(0,len(self.delA)): suma = GM(); for o in range(0,obslen): suma.addGM(als1[np.argmax([self.continuousDot(als1[j][a][o],b) for j in range(0,len(als1))])][a][o]); suma.scalerMultiply(self.discount); suma.addGM(R[a]); tmp = self.continuousDot(suma,b); #print(a,tmp); if(tmp > bestVal): bestAct = a; bestGM = copy.deepcopy(suma); bestVal = tmp; bestGM.action = bestAct; return bestGM;
def backupFactored(self,b): G = self.Gamma; R = self.r; pz = self.pz; als1 = self.preAls; bestVal = -10000000000; bestAct= [0,0]; bestGM = []; for am in range(0,len(self.delA)): for aq in range(0,8): suma = GM(); for oq in range(0,2): suma.addGM(als1[np.argmax([self.continuousDot(als1[j][am][aq][oq],b) for j in range(0,len(als1))])][am][aq][oq]); suma.scalerMultiply(self.discount); suma.addGM(R[am]); tmp = self.continuousDot(suma,b); #print(a,tmp); if(tmp > bestVal): bestAct = [am,aq]; bestGM = copy.deepcopy(suma); bestVal = tmp; bestGM.action = bestAct; return bestGM;
def backup(self, b): G = self.Gamma R = self.r pz = self.pz als1 = self.preAls #one alpha for each belief, so one per backup bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(self.delA)): suma = GM() for o in range(0, len(pz)): suma.addGM(als1[np.argmax([ self.continuousDot(als1[j][a][o], b) for j in range(0, len(als1)) ])][a][o]) suma.scalerMultiply(self.discount) suma.addGM(R) tmp = self.continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = suma bestVal = tmp bestGM.action = bestAct return bestGM
def backup(als, modes, delA, delAVar, pz, r, maxMix, b): newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))] for k in range(0, len(als))] for i in range(0, len(als)): for j in range(0, len(delA[0])): for k in range(0, len(pz)): newAls[i][j][k] = GM() for h in modes: tmpGM = als[i].GMProduct(pz[j]) mean = tmpGM.getMeans() for l in range(0, len(mean)): mean[l][0] -= delA[modes.index(h)][j] mean[l] = mean[l][0] var = tmpGM.getVars() for l in range(0, len(var)): var[l][0][0] += delAVar var[l] = var[l][0][0] weights = tmpGM.getWeights() tmpGM2 = GM() for l in range(0, len(mean)): tmpGM2.addG(Gaussian(mean[l], var[l], weights[l])) #tmpGM2 = GM(mean,var,tmpGM.getWeights()); newAls[i][j][k].addGM(tmpGM2.GMProduct(h)) bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(delA[0])): suma = GM() for o in range(0, len(pz)): suma.addGM(newAls[np.argmax([ continuousDot(newAls[j][a][o], b) for j in range(0, len(newAls)) ])][a][o]) suma.scalerMultiply(0.9) suma.addGM(r[a]) for g in suma.Gs: if (isinstance(g.mean, list)): g.mean = g.mean[0] g.var = g.var[0][0] suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20) tmp = continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def backup(self, b): G = self.Gamma R = self.r pz = self.pz if (self.useSoft): obslen = pz.size else: obslen = len(pz) als1 = self.preAls bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(self.delA)): suma = GM() for o in range(0, obslen): suma.addGM(als1[np.argmax([ self.continuousDot(als1[j][a][o], b) for j in range(0, len(als1)) ])][a][o]) suma.scalerMultiply(self.discount) suma.addGM(R[a]) tmp = self.continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def backup(als, modes, delA, delAVar, pz, r, maxMix, b): newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))] for k in range(0, len(als))] for i in range(0, len(als)): for j in range(0, len(delA[0])): for k in range(0, len(pz)): newAls[i][j][k] = GM() for h in range(0, len(modes.weights)): #print(als[i].getVars()); tmp1 = modes.runVB(als[i], h) for l in range(0, pz[k].size): for p in range(0, tmp1.size): mixp = tmp1.Gs[p] mixl = pz[k].Gs[l] weight1 = mixp.weight * mixl.weight weight = weight1 * mvn.pdf(mixp.mean, mixl.mean, mixp.var + mixl.var) c2 = (mixp.var**-1 + mixl.var**-1)**-1 c1 = c2 * (mixp.var**-1 * mixp.mean + mixl.var**-1 * mixl.mean) mean = c1 - delA[h][j] var = c2 + delAVar newAls[i][j][k].addG(Gaussian(mean, var, weight)) bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(delA[0])): suma = GM() for o in range(0, len(pz)): suma.addGM(newAls[np.argmax([ continuousDot(newAls[j][a][o], b) for j in range(0, len(newAls)) ])][a][o]) suma.scalerMultiply(0.9) suma.addGM(r[a]) for g in suma.Gs: if (isinstance(g.mean, list)): g.mean = g.mean[0] if (isinstance(g.var, list)): g.var = g.var[0][0] #suma = suma.kmeansCondensationN(k=maxMix,lowInit = -20,highInit=20); tmp = continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def MDPValueIteration(self, gen=True): if (gen): #Intialize Value function self.ValueFunc = copy.deepcopy(self.r) for g in self.ValueFunc.Gs: g.weight = -1000 comparision = GM() comparision.addG( Gaussian( [1, 0, 0, 0], [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], 1)) uniform = GM() for i in range(0, 5): for j in range(0, 5): for k in range(0, 5): for l in range(0, 5): uniform.addG( Gaussian([i, j, k, l], [[4, 0, 0, 0], [0, 4, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]], 1)) count = 0 #until convergence while (not self.ValueFunc.comp(comparision) and count < 30): print(count) comparision = copy.deepcopy(self.ValueFunc) count += 1 #print(count); maxVal = -10000000 maxGM = GM() for a in range(0, 2): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = self.continuousDot(uniform, suma) if (tmpVal > maxVal): maxVal = tmpVal maxGM = copy.deepcopy(suma) maxGM.scalerMultiply(self.discount) maxGM = maxGM.kmeansCondensationN(20) self.ValueFunc = copy.deepcopy(maxGM) #self.ValueFunc.display(); #self.ValueFunc.plot2D(); print("MDP Value Iteration Complete") #f = open("../policies/MDP4DIntercept.npy","w"); #np.save(f,self.ValueFunc); file = "policies/MDP4DIntercept" self.ValueFunc.printGMArrayToFile([self.ValueFunc], file) else: #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist(); file = "policies/MDP4DIntercept" tmp = GM() self.ValueFunc = tmp.readGMArray4D(file)[0]