def buildReward(self, gen=True): if (gen): self.r = GM() var = [[1, 0, .7, 0], [0, 1, 0, .7], [.7, 0, 1, 0], [0, .7, 0, 1]] for i in range(-2, 8): for j in range(-2, 8): self.r.addG(Gaussian([i, j, i, j], var, 5.6)) for i in range(-2, 8): for j in range(-2, 8): for k in range(-2, 8): for l in range(-2, 8): if (abs(i - j) >= 2 or abs(k - l) >= 2): self.r.addG(Gaussian([i, j, k, l], var, -1)) print('Plotting Reward Model') self.plotAllSlices(self.r, title='Uncondensed Reward') print('Condensing Reward Model') self.r.condense(50) print('Plotting Condensed Reward Model') self.plotAllSlices(self.r, title='Condensed Reward') #f = open("../models/rewardModel4DIntercept.npy","w"); #np.save(f,self.r); file = 'models/rewardModel4DIntercept' self.r.printGMArrayToFile([self.r], file) else: #self.r = np.load("../models/rewardModel4DIntercept.npy").tolist(); file = 'models/rewardModel4DIntercept' tmp = GM() self.r = tmp.readGMArray4D(file)[0]
def convertListToNorm(l): #coverts to a gaussian g = Gaussian() meanLen = 0 if (len(l) == 2): meanLen = 1 elif (len(l) == 6): meanLen = 2 elif (len(l) == 12): meanLen = 3 elif (len(l) == 20): meanLen = 4 elif (len(l) == 30): meanLen = 5 newMean = [] for i in range(0, meanLen): newMean.append(l[i]) h = l[meanLen:] newVar = [] for i in range(0, meanLen): line = [] for j in range(0, meanLen): line.append(h[i * meanLen + j]) newVar.append(line) g = Gaussian(newMean, newVar, 1) return g
def buildObs(self, gen=True): #cardinal + 1 model #left,right,up,down,near if (gen): self.pz = [0] * 5 for i in range(0, 5): self.pz[i] = GM() var = (np.identity(2) * 1).tolist() for x in range(-10, 10): for y in range(-10, 10): #left if (x < -1 and abs(x) > abs(y)): self.pz[0].addG(Gaussian([x, y], var, 1)) #right if (x > 1 and abs(x) > abs(y)): self.pz[1].addG(Gaussian([x, y], var, 1)) #up if (y > 1 and abs(x) < abs(y)): self.pz[2].addG(Gaussian([x, y], var, 1)) #down if (y < -1 and abs(y) > abs(x)): self.pz[3].addG(Gaussian([x, y], var, 1)) if (x >= -1 and x <= 1 and y >= -1 and y <= 1): self.pz[4].addG(Gaussian([x, y], var, 1)) print('Plotting Observation Models') for i in range(0, len(self.pz)): #self.pz[i].plot2D(xlabel = 'Robot X',ylabel = 'Robot Y',title = 'Obs:' + str(i)); [x, y, c] = self.pz[i].plot2D(low=[-10, -10], high=[10, 10], vis=False) plt.contourf(x, y, c, cmap='viridis') plt.colorbar() plt.show() print('Condensing Observation Models') for i in range(0, len(self.pz)): #self.pz[i] = self.pz[i].kmeansCondensationN(64); self.pz[i].condense(15) print('Plotting Condensed Observation Models') for i in range(0, len(self.pz)): #self.pz[i].plot2D(xlabel = 'Robot X',ylabel = 'Robot Y',title = 'Obs:' + str(i)); [x, y, c] = self.pz[i].plot2D(low=[-10, -10], high=[10, 10], vis=False) plt.contourf(x, y, c, cmap='viridis') plt.colorbar() plt.show() f = open("./models/obs/" + self.fileNamePrefix + "OBS.npy", "w") np.save(f, self.pz) else: self.pz = np.load("./models/obs/" + self.fileNamePrefix + "OBS.npy").tolist()
def smoothAngles(verts, angs, normPer): newAngs = [] #area = polyArea(verts); G = Gaussian(0, .5 * normPer, 1) for i in range(0, len(angs)): tmp = 0 for j in range(0, len(angs)): #tmp+= G.pointEval(distance(verts[i],verts[j]))*angs[j]; tmp += G.pointEval(distanceAlongPoints(verts, i, j)) * angs[j] newAngs.append(tmp) return newAngs
def buildAltObs(self, gen=True): #A front back left right center model #0:center #1-4: left,right,down,up if (gen): self.pz = [0] * 5 for i in range(0, 5): self.pz[i] = GM() var = [[.7, 0, 0, 0], [0, .7, 0, 0], [0, 0, .7, 0], [0, 0, 0, .7]] for i in range(-1, 7): for j in range(-1, 7): self.pz[0].addG(Gaussian([i, j, i, j], var, 1)) for i in range(-1, 7): for j in range(-1, 7): for k in range(-1, 7): for l in range(-1, 7): if (i - k > 0): self.pz[1].addG(Gaussian([i, j, k, l], var, 1)) if (i - k < 0): self.pz[2].addG(Gaussian([i, j, k, l], var, 1)) if (j - l > 0): self.pz[3].addG(Gaussian([i, j, k, l], var, 1)) if (j - l < 0): self.pz[4].addG(Gaussian([i, j, k, l], var, 1)) print('Plotting Observation Models') for i in range(0, len(self.pz)): self.plotAllSlices(self.pz[i], title='Uncondensed Observation') print('Condensing Observation Models') for i in range(0, len(self.pz)): self.pz[i] = self.pz[i].kmeansCondensationN( 50, lowInit=[-1, -1, -1, -1], highInit=[7, 7, 7, 7]) print('Plotting Condensed Observation Models') for i in range(0, len(self.pz)): self.plotAllSlices(self.pz[i], title='Condensed Observation') #f = open("../models/obsModel4DIntercept.npy","w"); #np.save(f,self.pz); file = 'models/obsAltModel4DIntercept' self.pz[0].printGMArrayToFile(self.pz, file) else: file = 'models/obsModel4DIntercept' tmp = GM() self.pz = tmp.readGMArray4D(file)
def transformCartToPol(bcart, offset=[0, 0]): bpol = GM() for g in bcart: m = g.mean m1 = [0, 0] m1[0] = m[0] - offset[0] m1[1] = m[1] - offset[1] mPrime = [np.sqrt(m1[0]**2 + m1[1]**2), np.arctan2(m1[1], m1[0])] if (m1[0]**2 + m1[1]**2 == 0): m1[0] = 0.0001 m1[1] = 0.0001 J11 = m1[0] / np.sqrt(m1[0]**2 + m1[1]**2) J12 = m1[1] / np.sqrt(m1[0]**2 + m1[1]**2) J21 = -m1[1] / (m1[0]**2 + m1[1]**2) J22 = m1[0] / (m1[0]**2 + m1[1]**2) JCarPol = np.matrix([[J11, J12], [J21, J22]]) var = np.matrix(g.var) varPrime = (JCarPol * var * JCarPol.T).tolist() bpol.addG(Gaussian(mPrime, varPrime, g.weight)) return bpol
def buildReward(self,gen = True): if(gen): self.r = [0]*len(self.delA); for i in range(0,len(self.r)): self.r[i] = GM(); var = (np.identity(4)*1).tolist(); #Need gaussians along the borders for positive and negative rewards for i in range(0,len(self.r)): for x1 in range(self.bounds[0][0],self.bounds[0][1]): for y1 in range(self.bounds[1][0],self.bounds[1][1]): for x2 in range(self.bounds[2][0],self.bounds[2][1]): for y2 in range(self.bounds[3][0],self.bounds[3][1]): if(math.sqrt((x1-x2)**2 + (y1-y2)**2) < 0.5): self.r[i].addG(Gaussian([x1,y1,x2,y2],var,1)); for r in self.r: r.display(); f = open(self.fileNamePrefix + "REW.npy","w"); np.save(f,self.r); else: self.r = np.load(self.fileNamePrefix + "REW.npy").tolist();
def beliefUpdate(self, b, a, o, mod): btmp = GM() for obs in mod.pz[o].Gs: for bel in b.Gs: sj = np.matrix(bel.mean).T si = np.matrix(obs.mean).T delA = np.matrix(mod.delA[a]).T sigi = np.matrix(obs.var) sigj = np.matrix(bel.var) delAVar = np.matrix(mod.delAVar) weight = obs.weight * bel.weight weight = weight * mvn.pdf( (sj + delA).T.tolist()[0], si.T.tolist()[0], np.add(sigi, sigj, delAVar)) var = (sigi.I + (sigj + delAVar).I).I mean = var * (sigi.I * si + (sigj + delAVar).I * (sj + delA)) weight = weight.tolist() mean = mean.T.tolist()[0] var = var.tolist() btmp.addG(Gaussian(mean, var, weight)) btmp.normalizeWeights() btmp = btmp.kmeansCondensationN(1) #btmp.condense(maxMix); btmp.normalizeWeights() return btmp
def testGeneralModel(): pz = Softmax() pz.buildGeneralModel(2, 4, [[1, 0], [2, 0], [3, 0]], np.matrix([-1, 1, -1, 1, 1, -1, 0, -1, -1]).T) #print('Plotting Observation Model'); #pz.plot2D(low=[0,0],high=[10,5],vis=True); prior = GM() for i in range(0, 10): for j in range(0, 5): prior.addG(Gaussian([i, j], [[1, 0], [0, 1]], 1)) # prior.addG(Gaussian([4,3],[[1,0],[0,1]],1)); # prior.addG(Gaussian([7,2],[[4,1],[1,4]],3)) prior.normalizeWeights() dela = 0.1 x, y = np.mgrid[0:10:dela, 0:5:dela] fig, axarr = plt.subplots(5) axarr[0].contourf(x, y, prior.discretize2D(low=[0, 0], high=[10, 5], delta=dela)) axarr[0].set_title('Prior') titles = ['Inside', 'Left', 'Right', 'Down'] for i in range(0, 4): post = pz.runVBND(prior, i) c = post.discretize2D(low=[0, 0], high=[10, 5], delta=dela) axarr[i + 1].contourf(x, y, c, cmap='viridis') axarr[i + 1].set_title('Post: ' + titles[i]) plt.show()
def buildReward(self,gen = True): if(gen): self.r = [0]*len(self.delA); for i in range(0,len(self.r)): self.r[i] = GM(); var = (np.identity(4)*5).tolist(); cutFactor = 3; for i in range(0,len(self.r)): for x1 in range(int(np.floor(self.bounds[0][0]/cutFactor))-1,int(np.ceil(self.bounds[0][1]/cutFactor))+1): for y1 in range(int(np.floor(self.bounds[1][0]/cutFactor))-1,int(np.ceil(self.bounds[1][1]/cutFactor))+1): for x2 in range(int(np.floor(self.bounds[2][0]/cutFactor))-1,int(np.ceil(self.bounds[2][1]/cutFactor))+1): for y2 in range(int(np.floor(self.bounds[3][0]/cutFactor))-1,int(np.ceil(self.bounds[3][1]/cutFactor))+1): if(np.sqrt((x1-x2)**2 + (y1-y2)**2) < 1): self.r[i].addG(Gaussian(np.array(([x1*cutFactor,y1*cutFactor,x2*cutFactor,y2*cutFactor])-np.array(self.delA[i])).tolist(),var,100)); # for r in self.r: # r.display(); f = open("../models/"+self.fileNamePrefix + "REW.npy","w"); np.save(f,self.r); else: self.r = np.load("../models/"+self.fileNamePrefix + "REW.npy").tolist();
def assignRooms(self, belief): #1. partition means into separate GMs, 1 for each room allBels = [] allBounds = [] for room in self.map_.rooms: tmp = GM() tmpw = 0 allBounds.append([ self.map_.rooms[room]['min_x'], self.map_.rooms[room]['min_y'], self.map_.rooms[room]['max_x'], self.map_.rooms[room]['max_y'] ]) for g in belief: m = [g.mean[2], g.mean[3]] # if mean is inside the room if (m[0] < self.map_.rooms[room]['max_x'] and m[0] > self.map_.rooms[room]['min_x'] and m[1] < self.map_.rooms[room]['max_y'] and m[1] > self.map_.rooms[room]['min_y']): tmp.addG(deepcopy(g)) tmpw += g.weight if (tmp.size == 0): centx = (self.map_.rooms[room]['max_x'] + self.map_.rooms[room]['min_x']) / 2 centy = (self.map_.rooms[room]['max_y'] + self.map_.rooms[room]['min_y']) / 2 var = np.identity(4).tolist() tmp.addG(Gaussian([0, 0, centx, centy], var, 0.0001)) allBels.append(tmp) return allBels
def preComputeAls(self): G = self.Gamma; R = self.r; pz = self.pz; als1 = [[[0 for i in range(0,len(pz))] for j in range(0,len(self.delA))] for k in range(0,len(G))]; for j in range(0,len(G)): for a in range(0,len(self.delA)): for o in range(0,len(pz)): als1[j][a][o] = GM(); for k in range(0,G[j].size): for l in range(0,pz[o].size): #get weights wk,wl, and del weight = G[j].Gs[k].weight*pz[o].Gs[l].weight*mvn.pdf(pz[o].Gs[l].mean,G[j].Gs[k].mean,(np.matrix(G[j].Gs[k].var)+np.matrix(pz[o].Gs[l].var)).tolist()); #get sig and ss sigtmp = (np.matrix(G[j].Gs[k].var).I + np.matrix(pz[o].Gs[l].var)).tolist(); sig = np.matrix(sigtmp).I.tolist(); sstmp = np.matrix(G[j].Gs[k].var).I*np.transpose(np.matrix(G[j].Gs[k].mean)) + np.matrix(pz[o].Gs[l].var).I*np.transpose(np.matrix(pz[o].Gs[l].mean)); ss = np.dot(sig,sstmp).tolist(); smean = (np.transpose(np.matrix(ss)) - np.matrix(self.delA[a])).tolist(); sigvar = (np.matrix(sig)+np.matrix(self.delAVar)).tolist(); als1[j][a][o].addG(Gaussian(smean[0],sigvar,weight)); self.preAls = als1;
def beliefUpdate(self,b,a,o): btmp = GM(); for obs in self.pz[o].Gs: for bel in b.Gs: sj = np.matrix(bel.mean).T; si = np.matrix(obs.mean).T; delA = np.matrix(self.delA[a]).T; sigi = np.matrix(obs.var); sigj = np.matrix(bel.var); delAVar = np.matrix(self.delAVar); weight = obs.weight*bel.weight; weight = weight*mvn.pdf((sj+delA).T.tolist()[0],si.T.tolist()[0],np.add(sigi,sigj,delAVar)); var = (sigi.I + (sigj+delAVar).I).I; mean = var*(sigi.I*si + (sigj+delAVar).I*(sj+delA)); weight = weight.tolist(); mean = mean.T.tolist()[0]; var = var.tolist(); btmp.addG(Gaussian(mean,var,weight)); btmp.normalizeWeights(); btmp = btmp.kmeansCondensationN(self.maxMix); #btmp.condense(maxMix); btmp.normalizeWeights(); return btmp;
def beliefUpdate(self, b, a, o, maxMix=10): btmp = GM() for i in self.pz[o].Gs: for j in b.Gs: tmp = mvn.pdf( np.add(np.matrix(j.mean), np.matrix(self.delA[a])).tolist(), i.mean, self.covAdd(self.covAdd(i.var, j.var), self.delAVar)) #print(i.weight,j.weight,tmp); w = i.weight * j.weight * tmp.tolist() sig = (np.add( np.matrix(i.var).I, np.matrix(self.covAdd(j.var, self.delAVar)).I)).I.tolist() #sstmp = np.matrix(i.var).I*np.transpose(i.mean) + np.matrix(self.covAdd(j.var + self.delAVar)).I*np.transpose(np.add(np.matrix(j.mean),np.matrix(delA[a]))); sstmp1 = np.matrix(i.var).I * np.transpose(np.matrix(i.mean)) sstmp2 = np.matrix(self.covAdd(j.var, self.delAVar)).I sstmp21 = np.add(np.matrix(j.mean), np.matrix(self.delA[a])) sstmp3 = sstmp1 + sstmp2 * np.transpose(sstmp21) smean = np.transpose(sig * sstmp3).tolist()[0] btmp.addG(Gaussian(smean, sig, w)) btmp = btmp.kmeansCondensationN(maxMix) #btmp.condense(maxMix); btmp.normalizeWeights() return btmp
def beliefUpdate(b, a, o, pz): btmp = GM() adelA = [-1, 1, 0] adelAVar = 0.5 for obs in pz[o].Gs: for bel in b.Gs: sj = np.matrix(bel.mean).T si = np.matrix(obs.mean).T delA = np.matrix(adelA[a]).T sigi = np.matrix(obs.var) sigj = np.matrix(bel.var) delAVar = np.matrix(adelAVar) weight = obs.weight * bel.weight weight = weight * mvn.pdf( (sj + delA).T.tolist()[0], si.T.tolist()[0], np.add(sigi, sigj, delAVar)) var = (sigi.I + (sigj + delAVar).I).I mean = var * (sigi.I * si + (sigj + delAVar).I * (sj + delA)) weight = weight.tolist() mean = mean.T.tolist()[0] var = var.tolist() btmp.addG(Gaussian(mean, var, weight)) btmp.normalizeWeights() btmp.condense(1) btmp.normalizeWeights() return btmp
def preComputeAlsSoftmaxFactored(self): G = self.Gamma; #for each alpha, each movement, each question, each question answer, each view cone als1 = np.zeros(shape = (len(G),len(self.delA),3,2)).tolist(); #questions left, right, in front of, behind for j in range(0,len(G)): for am in range(0,len(self.delA)): for aq in range(0,3): for oq in range(0,2): als1[j][am][aq][oq] = GM(); #get observation from question #If 0, multimodal alObs = GM(); if(oq == 0): for h in range(1,5): if(h!=aq and h!=3): alObs.addGM(self.pz.runVBND(G[j],h)); elif(oq == 1): if(aq==2): alObs.addGM(self.pz.runVBND(G[j],aq+2)); else: alObs.addGM(self.pz.runVBND(G[j],aq+1)); for k in alObs.Gs: mean = (np.matrix(k.mean) - np.matrix(self.delA[am])).tolist(); var = (np.matrix(k.var) + np.matrix(self.delAVar)).tolist(); weight = k.weight; als1[j][am][aq][oq].addG(Gaussian(mean,var,weight)); self.preAls = als1;
def generate(self, s, a): #sprime = np.random.choice([i for i in range(0,self.model.N)],p=self.model.px[a][s]); #tmpGM = GM((np.array(s) + np.array(self.model.delA)).T.tolist(),self.model.delAVar,1); tmpGM = GM() tmpGM.addG( Gaussian((np.array(s) + np.array(self.model.delA[a])).tolist(), self.model.delAVar, 1)) sprime = tmpGM.sample(1)[0] ztrial = [0] * len(self.model.pz) for i in range(0, len(self.model.pz)): ztrial[i] = self.model.pz[i].pointEval(sprime) z = ztrial.index(max(ztrial)) reward = self.model.r[a].pointEval(s) ''' if(a == 0 and s > 13): reward = 10; elif(a==1 and s<13): reward = 10; elif(a == 2 and s==13): reward = 100; else: reward = -10; ''' return [sprime, z, reward]
def precomputeAls(self): G = self.Gamma rew = self.rew numActs = 3 als1 = [[0 for j in range(0, numActs)] for k in range(0, len(G))] for j in range(0, len(G)): for a in range(0, numActs): als1[j][a] = GM() for k in range(0, G[j].size): weight = G[j][k].weight / self.A D = ((self.C**-1 * self.Q * self.C**-1)**-1 + G[j][k].var**-1)**-1 E = self.C**-1 * self.Q * self.C**-1 mean = self.A**-1 * \ (D*(E**-1*self.C*G[j][k].mean + G[j] [k].var**-1*G[j][k].mean)-self.B*self.acts[a]) var = self.A**-1 * ( D * (E**-1 * (E * (D**-1 * (D + self.R[a]) * D**-1) * E + self.Q + self.C * G[j][k].var * self.C) * E**-1) * D) * self.A**-1 als1[j][a].addG(Gaussian(mean, var, weight)) return als1
def testRectangleModel(): pz = Softmax(); pz.buildRectangleModel([[2,2],[3,4]],1); #print('Plotting Observation Model'); #pz.plot2D(low=[0,0],high=[10,5],vis=True); prior = GM(); for i in range(0,10): for j in range(0,5): prior.addG(Gaussian([i,j],[[1,0],[0,1]],1)); # prior.addG(Gaussian([4,3],[[1,0],[0,1]],1)); # prior.addG(Gaussian([7,2],[[4,1],[1,4]],3)) prior.normalizeWeights(); dela = 0.1; x, y = np.mgrid[0:10:dela, 0:5:dela] fig,axarr = plt.subplots(6); axarr[0].contourf(x,y,prior.discretize2D(low=[0,0],high=[10,5],delta=dela)); axarr[0].set_title('Prior'); titles = ['Inside','Left','Right','Up','Down']; for i in range(0,5): post = pz.runVBND(prior,i); c = post.discretize2D(low=[0,0],high=[10,5],delta=dela); axarr[i+1].contourf(x,y,c,cmap='viridis'); axarr[i+1].set_title('Post: ' + titles[i]); plt.show();
def runVB(self,prior,softClassNum): #For the one dimensional case only post = GM(); weight = self.weights; bias = self.bias; alpha = self.alpha; zeta_c = self.zeta_c; for g in prior.Gs: prevLogCHat = -1000; count = 0; while(count < 100000): count = count+1; [mean,var,yc,yc2] = self.Estep(weight,bias,g.mean,g.var,alpha,zeta_c,softClassNum = softClassNum); [zeta_c,alpha] = self.Mstep(len(weight),yc,yc2,zeta_c,alpha,steps = 100); logCHat = self.calcCHat(g.mean,g.var,mean,var,alpha,zeta_c,yc,yc2,mod=softClassNum); if(abs(prevLogCHat - logCHat) < 0.00001): break; else: prevLogCHat = logCHat; post.addG(Gaussian(mean,var,g.weight*np.exp(logCHat).tolist()[0][0])) return post;
def testMakeMap(): translator = POMDPTranslator() b = GM() b.addG(Gaussian([3, 2, 1, 0], np.identity(4).tolist(), 1)) translator.makeBeliefMap(b, [0, 0, 0])
def JSD(mix_i, mix_j): """ Computes the Jensen-Shannon divergence between two multivarite normal distributions using the Kullback-Leibler Divergence JSD(I || J) = 0.5*D(I || M) + 0.5*D(J || M) M = 0.5*(I + J) """ new_mix_i = deepcopy(mix_i) new_mix_j = deepcopy(mix_j) # compute M = 0.5 * (I + J) new_mean = np.multiply(0.5, np.add(new_mix_i.mean, new_mix_j.mean)) new_var = np.multiply(0.25, np.add(new_mix_i.var, new_mix_j.var)) new_mix_m = Gaussian(new_mean, new_var) # D(I || M) div1 = 0.5*(np.trace(np.dot(np.linalg.inv(new_mix_m.var),new_mix_i.var)) + np.dot(np.dot(np.transpose(np.subtract(new_mix_m.mean,new_mix_i.mean)) \ ,np.linalg.inv(new_mix_m.var)),(np.subtract(new_mix_m.mean,new_mix_i.mean))) \ - len(new_mix_m.mean) + np.log(np.linalg.det(new_mix_m.var)/np.linalg.det(new_mix_i.var))) # D(J || M) div2 = 0.5*(np.trace(np.dot(np.linalg.inv(new_mix_m.var),new_mix_j.var)) + np.dot(np.dot(np.transpose(np.subtract(new_mix_m.mean,new_mix_j.mean)) \ ,np.linalg.inv(new_mix_m.var)),(np.subtract(new_mix_m.mean,new_mix_j.mean))) \ - len(new_mix_m.mean) + np.log(np.linalg.det(new_mix_m.var)/np.linalg.det(new_mix_j.var))) div = (0.5 * div1) + (0.5 * div2) del new_mix_i del new_mix_j return div
def backup(als, modes, delA, delAVar, pz, r, maxMix, b): newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))] for k in range(0, len(als))] for i in range(0, len(als)): for j in range(0, len(delA[0])): for k in range(0, len(pz)): newAls[i][j][k] = GM() for h in modes: tmpGM = als[i].GMProduct(pz[j]) mean = tmpGM.getMeans() for l in range(0, len(mean)): mean[l][0] -= delA[modes.index(h)][j] mean[l] = mean[l][0] var = tmpGM.getVars() for l in range(0, len(var)): var[l][0][0] += delAVar var[l] = var[l][0][0] weights = tmpGM.getWeights() tmpGM2 = GM() for l in range(0, len(mean)): tmpGM2.addG(Gaussian(mean[l], var[l], weights[l])) #tmpGM2 = GM(mean,var,tmpGM.getWeights()); newAls[i][j][k].addGM(tmpGM2.GMProduct(h)) bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(delA[0])): suma = GM() for o in range(0, len(pz)): suma.addGM(newAls[np.argmax([ continuousDot(newAls[j][a][o], b) for j in range(0, len(newAls)) ])][a][o]) suma.scalerMultiply(0.9) suma.addGM(r[a]) for g in suma.Gs: if (isinstance(g.mean, list)): g.mean = g.mean[0] g.var = g.var[0][0] suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20) tmp = continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def simulate(cop_pose=[0, 0, -15.3], steps=10): translator = POMDPTranslator() b = GM() b.addG(Gaussian([3, 2, -2, 2], np.identity(4).tolist(), 1)) b.addG(Gaussian([3, 2, -8, -2], np.identity(4).tolist(), 1)) b.addG(Gaussian([3, 2, -4, -2], np.identity(4).tolist(), 1)) b.addG(Gaussian([0, 0, 2, 2], (np.identity(4) * 6).tolist(), 1)) b.normalizeWeights() #cop_pose = [0,0,-15.3]; rob_pose = [-5, 0, 0] for count in range(0, steps): [b, cop_pose, qs] = translator.getNextPose(b, None, [cop_pose]) print(distance(cop_pose[0], cop_pose[1], rob_pose[0], rob_pose[1]))
def cutGMTo2D(self, mix, dims=[2, 3]): newer = GM() for g in mix: newer.addG( Gaussian([g.mean[dims[0]], g.mean[dims[1]]], [[g.var[dims[0]][dims[0]], g.var[dims[0]][dims[1]]], [g.var[dims[1]][dims[0]], g.var[dims[1]][dims[1]]]], g.weight)) return newer
def buildReward(self, gen=True): if (gen): self.r = [0] * len(self.delA) for i in range(0, len(self.r)): self.r[i] = GM() var = (np.identity(2) * .25).tolist() for i in range(0, len(self.r)): self.r[i].addG( Gaussian([-self.delA[i][0], -self.delA[i][1]], var, 100)) print('Plotting Reward Model') for i in range(0, len(self.r)): self.r[i].plot2D(high=[10, 10], low=[-10, -10], xlabel='Robot X', ylabel='Robot Y', title='Reward for action: ' + str(i)) print('Condensing Reward Model') for i in range(0, len(self.r)): self.r[i] = self.r[i].kmeansCondensationN(k=5) print('Plotting Condensed Reward Model') for i in range(0, len(self.r)): #self.r[i].plot2D(xlabel = 'Robot X',ylabel = 'Robot Y',title = 'Reward for action: ' + str(i)); [x, y, c] = self.r[i].plot2D(high=[10, 10], low=[-10, -10], vis=False) minim = np.amin(c) maxim = np.amax(c) #print(minim,maxim); levels = np.linspace(minim, maxim) plt.contourf(x, y, c, levels=levels, vmin=minim, vmax=maxim, cmap='viridis') plt.title('Reward for action: ' + str(i)) plt.xlabel('Robot X') plt.ylabel('Robot Y') plt.show() f = open("./models/rew/" + self.fileNamePrefix + "REW.npy", "w") np.save(f, self.r) else: self.r = np.load("./models/rew/" + self.fileNamePrefix + "REW.npy").tolist()
def testBeliefUpdate(): translator = POMDPTranslator() b = GM() b.addG(Gaussian([3, 2, 1, 0], np.identity(4).tolist(), 1)) bcut = cutGMTo2D(b, dims=[0, 1]) bcut.plot2D(low=[0, 0], high=[10, 5]) b = translator.beliefUpdate(b, 2, [[8, 5]]) bcut = cutGMTo2D(b, dims=[0, 1]) bcut.plot2D(low=[0, 0], high=[10, 5])
def testInvertedSoftmaxModels(): b = GM() b.addG(Gaussian([2, 2], [[1, 0], [0, 1]], 1)) b.addG(Gaussian([4, 2], [[1, 0], [0, 1]], 1)) b.addG(Gaussian([2, 4], [[1, 0], [0, 1]], 1)) b.addG(Gaussian([3, 3], [[1, 0], [0, 1]], 1)) b.normalizeWeights() b.plot2D() pz = Softmax() pz.buildOrientedRecModel([2, 2], 0, 1, 1, 5) #pz.plot2D(); startTime = time.clock() b2 = GM() for i in range(1, 5): b2.addGM(pz.runVBND(b, i)) print(time.clock() - startTime) b2.plot2D() startTime = time.clock() b3 = GM() b3.addGM(b) tmpB = pz.runVBND(b, 0) tmpB.normalizeWeights() tmpB.scalerMultiply(-1) b3.addGM(tmpB) tmpBWeights = b3.getWeights() mi = min(b3.getWeights()) #print(mi); for g in b3.Gs: g.weight = g.weight - mi b3.normalizeWeights() print(time.clock() - startTime) #b3.display(); b3.plot2D()
def solveQ(self): self.Q = [0] * len(self.delA) V = self.ValueFunc for a in range(0, len(self.delA)): self.Q[a] = GM() for i in range(0, V.size): mean = (np.matrix(V.Gs[i].mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(V.Gs[i].var) + np.matrix(self.delAVar)).tolist() self.Q[a].addG(Gaussian(mean, var, V.Gs[i].weight)) self.Q[a].addGM(self.r)
def testGetNextPose(): translator = POMDPTranslator() b = GM() b.addG(Gaussian([3, 2, -2, 2], np.identity(4).tolist(), 1)) b.addG(Gaussian([3, 2, -8, -2], np.identity(4).tolist(), 1)) b.addG(Gaussian([3, 2, -4, -2], np.identity(4).tolist(), 1)) b.addG(Gaussian([0, 0, 2, 2], (np.identity(4) * 6).tolist(), 1)) b.normalizeWeights() #for i in range(-8,3): #for j in range(-1,2): #b.addG(Gaussian([3,2,i,j],np.identity(4).tolist(),1)); translator.cutGMTo2D(b, dims=[2, 3]).plot2D(low=[-9.6, -3.6], high=[4, 3.6]) [bnew, goal_pose, qs] = translator.getNextPose(b, None, [[0, 0, -15.3]]) bnew = translator.cutGMTo2D(bnew, dims=[2, 3]) bnew.plot2D(low=[-9.6, -3.6], high=[4, 3.6]) #print(qs) '''