def simulate(self, policy="interceptAlphasTemp.npy", initialPose=[1, 1, 4, 4], initialBelief=None, numSteps=20, mul=False, QMDP=False, MDP=False, mdpGen=True, human=False, greedy=False, randSim=False, altObs=False, belSave='tmpbelSave.npy', beliefMaxMix=10, verbose=True): if (initialBelief == None): b = GM() var = [[0.01, 0, 0, 0], [0, 0.01, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]] b.addG(Gaussian([initialPose[0], initialPose[1], 2.5, 2.5], var, 1)) else: b = initialBelief if (human): fig, ax = plt.subplots() elif (MDP or QMDP): self.MDPValueIteration(mdpGen) if (QMDP): self.solveQ() x = initialPose allX = [] allX.append(x) allX0 = [] allX0.append(x[0]) allX1 = [] allX1.append(x[1]) allX2 = [] allX2.append(x[2]) allX3 = [] allX3.append(x[3]) reward = 0 allReward = [0] allB = [] allB.append(b) allAct = [] if (randSim): for i in range(0, 3): for j in range(0, 3): for k in range(0, 3): for l in range(0, 3): x = [ i * 2 + 0.5, j * 2 + 0.5, k * 2 + 0.5, l * 2 + 0.5 ] b = GM() var = [[0.01, 0, 0, 0], [0, 0.01, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]] b.addG(Gaussian([x[0], x[1], 2.5, 2.5], var, 1)) for h in range(0, numSteps): act = random.randint(0, 4) x = np.random.multivariate_normal( [ x[0] + self.delA[act][0], x[1] + self.delA[act][1], x[2] + self.delA[act][2], x[3] + self.delA[act][3] ], self.delAVar, size=1)[0].tolist() x[0] = min(x[0], 5) x[0] = max(x[0], 0) x[1] = min(x[1], 5) x[1] = max(x[1], 0) x[2] = min(x[2], 5) x[2] = max(x[2], 0) x[3] = min(x[3], 5) x[3] = max(x[3], 0) if (not altObs): if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 else: z = 1 else: if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 elif (x[0] - x[2] > 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 1 elif (x[0] - x[2] < 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 2 elif (x[1] - x[3] > 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 3 elif (x[1] - x[3] < 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 4 b = self.beliefUpdate(b, act, z, beliefMaxMix) allB.append(b) allX.append(x) allX0.append(x[0]) allX1.append(x[1]) allX2.append(x[2]) allX3.append(x[3]) f = open(belSave, "w") np.save(f, allB) #allB[numSteps].plot2D(); print(max(allX0), min(allX0), sum(allX0) / float(len(allX0))) print(max(allX1), min(allX1), sum(allX1) / float(len(allX1))) print(max(allX2), min(allX2), sum(allX2) / float(len(allX2))) print(max(allX3), min(allX3), sum(allX3) / float(len(allX3))) else: self.Gamma = np.load(policy) for count in range(0, numSteps): if (human): ax.cla() col = 'b' if (self.distance(x[0], x[1], x[2], x[3]) <= 1): col = 'g' [xx, y, c] = b.slice2DFrom4D(vis=False) plt.contourf(xx, y, c, cmap='viridis') plt.scatter(x[0], x[1], c=col, s=200) plt.pause(0.5) act = -1 while (act not in [4, 6, 2, 8, 5, 99]): try: act = int(raw_input('Action?')) if (act == 99): break except: print("Please enter a valid action...") if (act == 4): act = 0 elif (act == 6): act = 1 elif (act == 2): act = 2 elif (act == 8): act = 3 elif (act == 5): act = 4 if (act == 99): self.exitFlag == True break elif (greedy): act = self.getGreedyAction(b, x) elif (MDP): act = self.getMDPAction(x) #print(act); elif (QMDP): act = self.getQMDPAction(b) else: act = self.getAction(b) if ((x[0] == 0 and act == 0) or (x[0] == 5 and act == 1) or (x[1] == 0 and act == 2) or (x[1] == 5 and act == 3)): act = 4 x = np.random.multivariate_normal([ x[0] + self.delA[act][0], x[1] + self.delA[act][1], x[2] + self.delA[act][2], x[3] + self.delA[act][3] ], self.delAVar, size=1)[0].tolist() allAct.append(act) x[0] = min(x[0], 5) x[0] = max(x[0], 0) x[1] = min(x[1], 5) x[1] = max(x[1], 0) x[2] = min(x[2], 5) x[2] = max(x[2], 0) x[3] = min(x[3], 5) x[3] = max(x[3], 0) if (not altObs): if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 else: z = 1 else: if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 elif (x[0] - x[2] > 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 1 elif (x[0] - x[2] < 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 2 elif (x[1] - x[3] > 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 3 elif (x[1] - x[3] < 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 4 if (not MDP): b = self.beliefUpdate(b, act, z, beliefMaxMix) ''' col = 'b'; if(self.distance(x[0],x[1],x[2],x[3]) <= 1): col = 'g' [xx,y,c] = b.slice2DFrom4D(vis=False); plt.contourf(xx,y,c,cmap = 'viridis'); plt.scatter(x[0],x[1],c=col,s = 200); plt.pause(0.5); print(act); ''' allB.append(b) allX.append(x) allX0.append(x[0]) allX1.append(x[1]) allX2.append(x[2]) allX3.append(x[3]) if (self.distance(x[0], x[1], x[2], x[3]) <= 1): reward += 3 allReward.append(reward) else: reward -= 1 allReward.append(reward) allAct.append(-1) if (verbose): print("Simulation Complete. Accumulated Reward: " + str(reward)) return [allB, allX0, allX1, allX2, allX3, allAct, allReward]
class InterceptTestGenerator: def __init__(self, beliefFile=None, dis=0.9, gen=False, altObs=True, qGen=True, humObs=True): if (humObs): fig, ax = plt.subplots() self.axes = ax self.humanObs = humObs #Initialize exit flag self.exitFlag = False self.b = None self.buildTransition() if (gen == True): print("Building Observation Models") if (altObs): self.buildAltObs(gen=gen) else: self.buildObs(gen=gen) if (gen == True): print("Building Reward Model") self.buildReward(gen=gen) self.discount = dis if (qGen == True): self.MDPValueIteration(False) self.solveQ() if (beliefFile == None): self.B = [0] * 5 self.B[0] = GM() var = np.matrix([[1, 0], [0, 1]]) self.B[0].addG(Gaussian([2.5, 2.5], var, 1)) self.B[1] = GM() self.B[1].addG(Gaussian([1, 5], var, 1)) self.B[2] = GM() self.B[2].addG(Gaussian([5, 1], var, 1)) self.B[3] = GM() self.B[3].addG(Gaussian([0, 0], var, 1)) self.B[4] = GM() self.B[4].addG(Gaussian([5, 5], var, 1)) for i in range(0, 100): tmp = GM() tmp.addG( Gaussian([random.random() * 5, random.random() * 5], var, 1)) self.B.append(tmp) else: self.B = np.load(beliefFile).tolist() #Initialize Gamma self.Gamma = [copy.deepcopy(self.r)] #self.Gamma = [copy.deepcopy(self.r),copy.deepcopy(self.r),copy.deepcopy(self.r)]; ''' for i in range(0,3): self.Gamma[i].addG(Gaussian([0,0],[[100,0],[0,100]],-5)); self.Gamma[i].action = i; ''' #TODO: This stuff.... for i in range(0, len(self.Gamma)): for j in range(0, len(self.Gamma[i].Gs)): self.Gamma[i].Gs[j].weight = -100000 #tmp = 0; def solve(self, N, maxMix=20, finalMix=50, verbose=False, alsave="interceptAlphasTemp.npy"): for counter in range(0, N): if (self.exitFlag): break if (verbose): print("Iteration: " + str(counter + 1)) else: print("Iteration: " + str(counter + 1)) bestAlphas = [GM()] * len(self.B) Value = [0] * len(self.B) for b in self.B: bestAlphas[self.B.index(b)] = self.Gamma[np.argmax([ self.continuousDot(self.Gamma[j], b) for j in range(0, len(self.Gamma)) ])] Value[self.B.index(b)] = self.continuousDot( bestAlphas[self.B.index(b)], b) GammaNew = [] BTilde = copy.deepcopy(self.B) self.preComputeAls() #self.newPreComputeAls(); while (len(BTilde) > 0): if (self.exitFlag): break b = random.choice(BTilde) BTilde.remove(b) al = self.backup(b) #TODO: You added the else here if (self.continuousDot(al, b) < Value[self.findB(b)]): index = 0 for h in self.B: if (b.comp(h)): index = self.B.index(h) al = bestAlphas[index] else: index = 0 for h in self.B: if (b.comp(h)): index = self.B.index(h) bestAlphas[index] = al #remove from Btilde all b for which this alpha is better than its current for bprime in BTilde: if (self.continuousDot(al, bprime) >= Value[self.findB(bprime)]): BTilde.remove(bprime) GammaNew += [al] if (verbose and self.exitFlag == False): print("Number of Alphas: " + str(len(GammaNew))) av = 0 for i in range(0, len(GammaNew)): av += GammaNew[i].size av = av / len(GammaNew) print("Average number of mixands: " + str(av)) if (self.exitFlag == False): if (counter < N - 1): for i in range(0, len(GammaNew)): #TODO: Switch back to kmeans #GammaNew[i].condense(max_num_mixands=maxMix); GammaNew[i] = GammaNew[i].kmeansCondensationN(k=maxMix) elif (counter == N - 1): for i in range(0, len(GammaNew)): #GammaNew[i].condense(max_num_mixands=finalMix); GammaNew[i] = GammaNew[i].kmeansCondensationN( k=finalMix) if (verbose and self.exitFlag == False): #GammaNew[0].display(); av = 0 for i in range(0, len(GammaNew)): av += GammaNew[i].size av = av / len(GammaNew) print("Reduced number of mixands: " + str(av)) print( "Actions: " + str([GammaNew[i].action for i in range(0, len(GammaNew))])) print("") if (self.exitFlag == False): f = open(alsave, "w") np.save(f, self.Gamma) f.close() self.Gamma = copy.deepcopy(GammaNew) ''' if((counter+1)%5 == 0): for i in range(0,len(self.Gamma)): fig1 = plt.figure(); print(self.Gamma[i].action); self.Gamma[i].plot2D(); for j in range(0,3): print(self.getAction(self.B[j])); ''' f = open(alsave, "w") np.save(f, self.Gamma) f.close() def preComputeAls(self): G = self.Gamma R = self.r pz = self.pz als1 = [[[0 for i in range(0, len(pz))] for j in range(0, len(self.delA))] for k in range(0, len(G))] for j in range(0, len(G)): for a in range(0, len(self.delA)): for o in range(0, len(pz)): als1[j][a][o] = GM() for k in range(0, G[j].size): for l in range(0, pz[o].size): #get weights wk,wl, and del weight = G[j].Gs[k].weight * pz[ o].Gs[l].weight * mvn.pdf( pz[o].Gs[l].mean, G[j].Gs[k].mean, (np.matrix(G[j].Gs[k].var) + np.matrix(pz[o].Gs[l].var)).tolist()) #get sig and ss sigtmp = (np.matrix(G[j].Gs[k].var).I + np.matrix(pz[o].Gs[l].var)).tolist() sig = np.matrix(sigtmp).I.tolist() sstmp = np.matrix(G[j].Gs[k].var).I * np.transpose( np.matrix(G[j].Gs[k].mean)) + np.matrix( pz[o].Gs[l].var).I * np.transpose( np.matrix(pz[o].Gs[l].mean)) ss = np.dot(sig, sstmp).tolist() smean = (np.transpose(np.matrix(ss)) + np.matrix(self.delA[a])).tolist() sigvar = (np.matrix(sig) + np.matrix(self.delAVar)).tolist() als1[j][a][o].addG( Gaussian(smean[0], sigvar, weight)) self.preAls = als1 #based on the idea that 1-detect = not detect #only to be used for binary observations def newPreComputeAls(self): G = self.Gamma R = self.r pz = self.pz als1 = [[[0 for i in range(0, len(pz))] for j in range(0, len(self.delA))] for k in range(0, len(G))] for j in range(0, len(G)): for a in range(0, len(self.delA)): o = 0 als1[j][a][o] = GM() for k in range(0, G[j].size): for l in range(0, pz[o].size): #get weights wk,wl, and del weight = G[j].Gs[k].weight * pz[o].Gs[ l].weight * mvn.pdf( pz[o].Gs[l].mean, G[j].Gs[k].mean, self.covAdd(G[j].Gs[k].var, pz[o].Gs[l].var)) #get sig and ss sig = (np.matrix(G[j].Gs[k].var).I + np.matrix(pz[o].Gs[l].var).I).I.tolist() sstmp = np.matrix(G[j].Gs[k].var).I * np.transpose( np.matrix(G[j].Gs[k].mean)) + np.matrix( pz[o].Gs[l].var).I * np.transpose( np.matrix(pz[o].Gs[l].mean)) ss = np.dot(sig, sstmp) smean = (np.transpose(np.matrix(ss)) + np.matrix(self.delA[a])).tolist() sigvar = (np.matrix(sig) + np.matrix(self.delAVar)).tolist() als1[j][a][o].addG(Gaussian(smean[0], sigvar, weight)) als1[j][a][1] = GM() o = 1 for k in range(0, G[j].size): kap = G[j].Gs[k] mean = (np.matrix(kap.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(kap.var) + np.matrix(self.delAVar)).tolist() als1[j][a][o].addG(Gaussian(mean, var, kap.weight)) for l in range(0, pz[0].size): op = pz[0].Gs[l] var = (np.matrix(kap.var) + np.matrix(op.var)).tolist() weight = kap.weight * op.weight * mvn.pdf( kap.mean, op.mean, var) c2 = (np.matrix(kap.var).I + np.matrix(op.var).I).I c1 = c2 * (np.matrix(kap.var).I * np.transpose( np.matrix(kap.mean)) + np.matrix(op.var).I * np.transpose(np.matrix(op.mean))) me = np.transpose(( c1 - np.transpose(np.matrix(self.delA[a])))).tolist()[0] als1[j][a][o].addG( Gaussian(me, (c2 + np.matrix(self.delAVar)).tolist(), -weight)) self.preAls = als1 def backup(self, b): G = self.Gamma R = self.r pz = self.pz als1 = self.preAls #one alpha for each belief, so one per backup bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(self.delA)): suma = GM() for o in range(0, len(pz)): suma.addGM(als1[np.argmax([ self.continuousDot(als1[j][a][o], b) for j in range(0, len(als1)) ])][a][o]) suma.scalerMultiply(self.discount) suma.addGM(R) tmp = self.continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = suma bestVal = tmp bestGM.action = bestAct return bestGM def getAction(self, b): act = self.Gamma[np.argmax( [self.continuousDot(j, b) for j in self.Gamma])].action return act def getSecondaryAction(self, b, exclude): sG = [] for g in self.Gamma: if (g.action not in exclude): sG.append(g) act = sG[np.argmax([self.continuousDot(j, b) for j in sG])].action return act def getGreedyAction(self, b, x): cut = b.slice2DFrom4D(retGS=True, vis=False) MAP = cut.findMAP2D() cop = [x[0], x[1]] rob = [MAP[0], MAP[1]] xdist = cop[0] - rob[0] ydist = cop[1] - rob[1] if (abs(xdist) > abs(ydist)): if (xdist > 0): act = 0 else: act = 1 else: if (ydist > 0): act = 2 else: act = 3 return act def MDPValueIteration(self, gen=True): if (gen): #Intialize Value function self.ValueFunc = copy.deepcopy(self.r) for g in self.ValueFunc.Gs: g.weight = -1000 comparision = GM() comparision.addG( Gaussian( [1, 0, 0, 0], [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], 1)) uniform = GM() for i in range(0, 5): for j in range(0, 5): for k in range(0, 5): for l in range(0, 5): uniform.addG( Gaussian([i, j, k, l], [[4, 0, 0, 0], [0, 4, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]], 1)) count = 0 #until convergence while (not self.ValueFunc.comp(comparision) and count < 30): print(count) comparision = copy.deepcopy(self.ValueFunc) count += 1 #print(count); maxVal = -10000000 maxGM = GM() for a in range(0, 2): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = self.continuousDot(uniform, suma) if (tmpVal > maxVal): maxVal = tmpVal maxGM = copy.deepcopy(suma) maxGM.scalerMultiply(self.discount) maxGM = maxGM.kmeansCondensationN(20) self.ValueFunc = copy.deepcopy(maxGM) #self.ValueFunc.display(); #self.ValueFunc.plot2D(); print("MDP Value Iteration Complete") #f = open("../policies/MDP4DIntercept.npy","w"); #np.save(f,self.ValueFunc); file = "policies/MDP4DIntercept" self.ValueFunc.printGMArrayToFile([self.ValueFunc], file) else: #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist(); file = "policies/MDP4DIntercept" tmp = GM() self.ValueFunc = tmp.readGMArray4D(file)[0] def getMDPAction(self, x): maxVal = -10000000 maxGM = GM() bestAct = 0 for a in range(0, len(self.delA)): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = suma.pointEval(x) if (tmpVal > maxVal): maxVal = tmpVal maxGM = suma bestAct = a return bestAct def solveQ(self): self.Q = [0] * len(self.delA) V = self.ValueFunc for a in range(0, len(self.delA)): self.Q[a] = GM() for i in range(0, V.size): mean = (np.matrix(V.Gs[i].mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(V.Gs[i].var) + np.matrix(self.delAVar)).tolist() self.Q[a].addG(Gaussian(mean, var, V.Gs[i].weight)) self.Q[a].addGM(self.r) #f = open("../policies/qmdp4DIntercept.npy","w"); #np.save(f,self.Q); def getQMDPAction(self, b): act = np.argmax( [self.continuousDot(self.Q[j], b) for j in range(0, len(self.Q))]) return act def getQMDPSecondaryAction(self, b, exclude=[]): sG = [] for a in range(0, len(self.delA)): if (a not in exclude): sG.append(a) bestVal = -10000000000 act = -1 for a in sG: tmpVal = self.continuousDot(self.Q[a], b) if (tmpVal > bestVal): bestVal = tmpVal act = a return act def covAdd(self, a, b): if (type(b) is not list): b = b.tolist() if (type(a) is not list): a = a.tolist() c = copy.deepcopy(a) for i in range(0, len(a)): for j in range(0, len(a[i])): c[i][j] += b[i][j] return c def findB(self, b): for beta in self.B: if (beta.comp(b)): return self.B.index(beta) def continuousDot(self, a, b): suma = 0 if (isinstance(a, np.ndarray)): a = a.tolist() a = a[0] if (isinstance(a, list)): a = a[0] a.clean() b.clean() for k in range(0, a.size): for l in range(0, b.size): suma += a.Gs[k].weight * b.Gs[l].weight * mvn.pdf( b.Gs[l].mean, a.Gs[k].mean, np.matrix(a.Gs[k].var) + np.matrix(b.Gs[l].var)) return suma #TODO: You changed the variance for the cop #TODO: You changed the length of the transitions #movement variance is 0.25 for the robber, stationary is 0.0001 def buildTransition(self): self.delAVar = [[0.0001, 0, 0, 0], [0, 0.0001, 0, 0], [0, 0, 0.15, 0], [0, 0, 0, 0.15]] self.delA = [[-1, 0, 0, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0]] def buildAltObs(self, gen=True): #A front back left right center model #0:center #1-4: left,right,down,up if (gen): self.pz = [0] * 5 for i in range(0, 5): self.pz[i] = GM() var = [[.7, 0, 0, 0], [0, .7, 0, 0], [0, 0, .7, 0], [0, 0, 0, .7]] for i in range(-1, 7): for j in range(-1, 7): self.pz[0].addG(Gaussian([i, j, i, j], var, 1)) for i in range(-1, 7): for j in range(-1, 7): for k in range(-1, 7): for l in range(-1, 7): if (i - k > 0): self.pz[1].addG(Gaussian([i, j, k, l], var, 1)) if (i - k < 0): self.pz[2].addG(Gaussian([i, j, k, l], var, 1)) if (j - l > 0): self.pz[3].addG(Gaussian([i, j, k, l], var, 1)) if (j - l < 0): self.pz[4].addG(Gaussian([i, j, k, l], var, 1)) print('Plotting Observation Models') for i in range(0, len(self.pz)): self.plotAllSlices(self.pz[i], title='Uncondensed Observation') print('Condensing Observation Models') for i in range(0, len(self.pz)): self.pz[i] = self.pz[i].kmeansCondensationN( 50, lowInit=[-1, -1, -1, -1], highInit=[7, 7, 7, 7]) print('Plotting Condensed Observation Models') for i in range(0, len(self.pz)): self.plotAllSlices(self.pz[i], title='Condensed Observation') #f = open("../models/obsModel4DIntercept.npy","w"); #np.save(f,self.pz); file = 'models/obsAltModel4DIntercept' self.pz[0].printGMArrayToFile(self.pz, file) else: file = 'models/obsModel4DIntercept' tmp = GM() self.pz = tmp.readGMArray4D(file) def buildObs(self, gen=True): if (gen): self.pz = [GM(), GM()] var = [[1, 0, .7, 0], [0, 1, 0, .7], [.7, 0, 1, 0], [0, .7, 0, 1]] for i in range(-2, 8): for j in range(-2, 8): self.pz[0].addG(Gaussian([i, j, i, j], var, 1)) for i in range(-2, 8): for j in range(-2, 8): for k in range(-2, 8): for l in range(-2, 8): if (abs(i - k) >= 2 or abs(j - l) >= 2): self.pz[1].addG(Gaussian([i, j, k, l], var, 1)) print('Plotting Observation Models') self.plotAllSlices(self.pz[0], title='Uncondensed Detection') self.plotAllSlices(self.pz[1], title='Uncondensed Non-Detect') print('Condensing Observation Models') self.pz[0].condense(20) self.pz[1] = self.pz[1].kmeansCondensationN( 45, lowInit=[-1, -1, -1, -1], highInit=[7, 7, 7, 7]) print('Plotting Condensed Observation Models') self.plotAllSlices(self.pz[0], title='Condensed Detection') self.plotAllSlices(self.pz[1], title='Condensed Non-Detect') #f = open("../models/obsModel4DIntercept.npy","w"); #np.save(f,self.pz); file = '../models/obsModel4DIntercept' self.pz[0].printGMArrayToFile(self.pz, file) else: file = '../models/obsModel4DIntercept' tmp = GM() self.pz = tmp.readGMArray4D(file) def buildReward(self, gen=True): if (gen): self.r = GM() var = [[1, 0, .7, 0], [0, 1, 0, .7], [.7, 0, 1, 0], [0, .7, 0, 1]] for i in range(-2, 8): for j in range(-2, 8): self.r.addG(Gaussian([i, j, i, j], var, 5.6)) for i in range(-2, 8): for j in range(-2, 8): for k in range(-2, 8): for l in range(-2, 8): if (abs(i - j) >= 2 or abs(k - l) >= 2): self.r.addG(Gaussian([i, j, k, l], var, -1)) print('Plotting Reward Model') self.plotAllSlices(self.r, title='Uncondensed Reward') print('Condensing Reward Model') self.r.condense(50) print('Plotting Condensed Reward Model') self.plotAllSlices(self.r, title='Condensed Reward') #f = open("../models/rewardModel4DIntercept.npy","w"); #np.save(f,self.r); file = 'models/rewardModel4DIntercept' self.r.printGMArrayToFile([self.r], file) else: #self.r = np.load("../models/rewardModel4DIntercept.npy").tolist(); file = 'models/rewardModel4DIntercept' tmp = GM() self.r = tmp.readGMArray4D(file)[0] def beliefUpdate(self, b, a, o, maxMix=10): btmp = GM() for i in self.pz[o].Gs: for j in b.Gs: tmp = mvn.pdf( np.add(np.matrix(j.mean), np.matrix(self.delA[a])).tolist(), i.mean, self.covAdd(self.covAdd(i.var, j.var), self.delAVar)) #print(i.weight,j.weight,tmp); w = i.weight * j.weight * tmp.tolist() sig = (np.add( np.matrix(i.var).I, np.matrix(self.covAdd(j.var, self.delAVar)).I)).I.tolist() #sstmp = np.matrix(i.var).I*np.transpose(i.mean) + np.matrix(self.covAdd(j.var + self.delAVar)).I*np.transpose(np.add(np.matrix(j.mean),np.matrix(delA[a]))); sstmp1 = np.matrix(i.var).I * np.transpose(np.matrix(i.mean)) sstmp2 = np.matrix(self.covAdd(j.var, self.delAVar)).I sstmp21 = np.add(np.matrix(j.mean), np.matrix(self.delA[a])) sstmp3 = sstmp1 + sstmp2 * np.transpose(sstmp21) smean = np.transpose(sig * sstmp3).tolist()[0] btmp.addG(Gaussian(smean, sig, w)) btmp = btmp.kmeansCondensationN(maxMix) #btmp.condense(maxMix); btmp.normalizeWeights() return btmp def distance(self, x1, y1, x2, y2): dist = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) dist = math.sqrt(dist) return dist def getNextPose(self, x, isCop=True, exclude=[]): plotFlag = True if (self.b == None): self.b = GM( [x[0], x[1], 2.5, 2.5], [[0.01, 0, 0, 0], [0, 0.01, 0, 0], [0, 0, 5, 0], [0, 0, 0, 5]], 1) plotFlag = False prevX = copy.deepcopy(x) act = -1 if (isCop): z = -1 obsName = 'None' if (plotFlag): act = self.getQMDPSecondaryAction(self.b) z = -1 while (z not in [4, 6, 2, 8, 5, 99]): try: z = int(raw_input('Observation?')) if (z == 99): break except: if (z not in [4, 6, 2, 8, 5, 99]): print("Please enter a valid observation...") if (z == 4): z = 1 obsName = 'Left' elif (z == 6): z = 2 obsName = 'Right' elif (z == 2): z = 3 obsName = 'Down' elif (z == 8): z = 4 obsName = 'Up' elif (z == 5): z = 0 obsName = 'Near' if (z == 99): z = -1 self.exitFlag = True self.b = self.beliefUpdate(self.b, act, z) self.axes.cla() xlabel = 'X Position' ylabel = 'Y Position' title = 'Most Recent Observation: ' + obsName [xx, yy, c] = self.b.slice2DFrom4D(vis=False) self.axes.contourf(xx, yy, c, cmap='viridis') col = 'r' if (z == 0): col = 'g' #cop = self.axes.scatter(x[0],x[1],color = col,s = 100); #robber = self.axes.scatter(x[2],x[3],color = 'b',s = 100); self.axes.set_xlabel(xlabel) self.axes.set_ylabel(ylabel) self.axes.set_title(title) if (act == -1): act = self.getQMDPSecondaryAction(self.b, exclude) #x = np.random.multivariate_normal([x[0] + self.delA[act][0],x[1] + self.delA[act][1],x[2]+self.delA[act][2],x[3]+self.delA[act][3]],self.delAVar,size =1)[0].tolist(); x[0] = x[0] + self.delA[act][0] x[1] = x[1] + self.delA[act][1] x[2] = x[2] + self.delA[act][2] + (random.random() - 0.5) x[3] = x[3] + self.delA[act][3] + (random.random() - 0.5) x[0] = min(x[0], 5) x[0] = max(x[0], 0) x[1] = min(x[1], 5) x[1] = max(x[1], 0) x[2] = min(x[2], 5) x[2] = max(x[2], 0) x[3] = min(x[3], 5) x[3] = max(x[3], 0) if (isCop): col = 'r' if (z == 0): col = 'g' cop = self.axes.scatter(prevX[0], prevX[1], color=col, s=100) robber = self.axes.scatter(prevX[2], prevX[3], color='b', s=100) self.axes.arrow(prevX[0], prevX[1], x[0] - prevX[0], x[1] - prevX[1], head_width=0.05, head_length=0.15, fc=col, ec=col) #self.axes.arrow(prevX[2],prevX[3],x[2]-prevX[2],x[3]-prevX[3],head_width = 0.05,head_length=0.25, fc='b',ec='b'); plt.pause(0.5) return x def simulate(self, policy="interceptAlphasTemp.npy", initialPose=[1, 1, 4, 4], initialBelief=None, numSteps=20, mul=False, QMDP=False, MDP=False, mdpGen=True, human=False, greedy=False, randSim=False, altObs=False, belSave='tmpbelSave.npy', beliefMaxMix=10, verbose=True): if (initialBelief == None): b = GM() var = [[0.01, 0, 0, 0], [0, 0.01, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]] b.addG(Gaussian([initialPose[0], initialPose[1], 2.5, 2.5], var, 1)) else: b = initialBelief if (human): fig, ax = plt.subplots() elif (MDP or QMDP): self.MDPValueIteration(mdpGen) if (QMDP): self.solveQ() x = initialPose allX = [] allX.append(x) allX0 = [] allX0.append(x[0]) allX1 = [] allX1.append(x[1]) allX2 = [] allX2.append(x[2]) allX3 = [] allX3.append(x[3]) reward = 0 allReward = [0] allB = [] allB.append(b) allAct = [] if (randSim): for i in range(0, 3): for j in range(0, 3): for k in range(0, 3): for l in range(0, 3): x = [ i * 2 + 0.5, j * 2 + 0.5, k * 2 + 0.5, l * 2 + 0.5 ] b = GM() var = [[0.01, 0, 0, 0], [0, 0.01, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]] b.addG(Gaussian([x[0], x[1], 2.5, 2.5], var, 1)) for h in range(0, numSteps): act = random.randint(0, 4) x = np.random.multivariate_normal( [ x[0] + self.delA[act][0], x[1] + self.delA[act][1], x[2] + self.delA[act][2], x[3] + self.delA[act][3] ], self.delAVar, size=1)[0].tolist() x[0] = min(x[0], 5) x[0] = max(x[0], 0) x[1] = min(x[1], 5) x[1] = max(x[1], 0) x[2] = min(x[2], 5) x[2] = max(x[2], 0) x[3] = min(x[3], 5) x[3] = max(x[3], 0) if (not altObs): if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 else: z = 1 else: if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 elif (x[0] - x[2] > 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 1 elif (x[0] - x[2] < 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 2 elif (x[1] - x[3] > 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 3 elif (x[1] - x[3] < 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 4 b = self.beliefUpdate(b, act, z, beliefMaxMix) allB.append(b) allX.append(x) allX0.append(x[0]) allX1.append(x[1]) allX2.append(x[2]) allX3.append(x[3]) f = open(belSave, "w") np.save(f, allB) #allB[numSteps].plot2D(); print(max(allX0), min(allX0), sum(allX0) / float(len(allX0))) print(max(allX1), min(allX1), sum(allX1) / float(len(allX1))) print(max(allX2), min(allX2), sum(allX2) / float(len(allX2))) print(max(allX3), min(allX3), sum(allX3) / float(len(allX3))) else: self.Gamma = np.load(policy) for count in range(0, numSteps): if (human): ax.cla() col = 'b' if (self.distance(x[0], x[1], x[2], x[3]) <= 1): col = 'g' [xx, y, c] = b.slice2DFrom4D(vis=False) plt.contourf(xx, y, c, cmap='viridis') plt.scatter(x[0], x[1], c=col, s=200) plt.pause(0.5) act = -1 while (act not in [4, 6, 2, 8, 5, 99]): try: act = int(raw_input('Action?')) if (act == 99): break except: print("Please enter a valid action...") if (act == 4): act = 0 elif (act == 6): act = 1 elif (act == 2): act = 2 elif (act == 8): act = 3 elif (act == 5): act = 4 if (act == 99): self.exitFlag == True break elif (greedy): act = self.getGreedyAction(b, x) elif (MDP): act = self.getMDPAction(x) #print(act); elif (QMDP): act = self.getQMDPAction(b) else: act = self.getAction(b) if ((x[0] == 0 and act == 0) or (x[0] == 5 and act == 1) or (x[1] == 0 and act == 2) or (x[1] == 5 and act == 3)): act = 4 x = np.random.multivariate_normal([ x[0] + self.delA[act][0], x[1] + self.delA[act][1], x[2] + self.delA[act][2], x[3] + self.delA[act][3] ], self.delAVar, size=1)[0].tolist() allAct.append(act) x[0] = min(x[0], 5) x[0] = max(x[0], 0) x[1] = min(x[1], 5) x[1] = max(x[1], 0) x[2] = min(x[2], 5) x[2] = max(x[2], 0) x[3] = min(x[3], 5) x[3] = max(x[3], 0) if (not altObs): if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 else: z = 1 else: if (self.distance(x[0], x[1], x[2], x[3]) <= 1): z = 0 elif (x[0] - x[2] > 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 1 elif (x[0] - x[2] < 0 and abs(x[0] - x[2]) > abs(x[1] - x[3])): z = 2 elif (x[1] - x[3] > 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 3 elif (x[1] - x[3] < 0 and abs(x[1] - x[3]) > abs(x[0] - x[2])): z = 4 if (not MDP): b = self.beliefUpdate(b, act, z, beliefMaxMix) ''' col = 'b'; if(self.distance(x[0],x[1],x[2],x[3]) <= 1): col = 'g' [xx,y,c] = b.slice2DFrom4D(vis=False); plt.contourf(xx,y,c,cmap = 'viridis'); plt.scatter(x[0],x[1],c=col,s = 200); plt.pause(0.5); print(act); ''' allB.append(b) allX.append(x) allX0.append(x[0]) allX1.append(x[1]) allX2.append(x[2]) allX3.append(x[3]) if (self.distance(x[0], x[1], x[2], x[3]) <= 1): reward += 3 allReward.append(reward) else: reward -= 1 allReward.append(reward) allAct.append(-1) if (verbose): print("Simulation Complete. Accumulated Reward: " + str(reward)) return [allB, allX0, allX1, allX2, allX3, allAct, allReward] def plotRewardErrorBounds(self, allSimRewards): #find average reward averageRewards = copy.deepcopy(allSimRewards[0]) for i in range(1, simCount): for j in range(0, len(allSimRewards[i])): averageRewards[j] += allSimRewards[i][j] for i in range(0, len(averageRewards)): averageRewards[i] = averageRewards[i] / len(allSimRewards) #find sigma bounds sampleVariances = [0 for i in range(0, len(allSimRewards[0]))] twoSigmaBounds = [0 for i in range(0, len(allSimRewards[0]))] for i in range(0, len(sampleVariances)): suma = 0 for j in range(0, len(allSimRewards)): suma += (allSimRewards[j][i] - averageRewards[i])**2 sampleVariances[i] = suma / len(allSimRewards) twoSigmaBounds[i] = sqrt(sampleVariances[i]) * 2 #plot figure time = [i for i in range(0, len(allSimRewards[0]))] plt.figure() plt.errorbar(time, averageRewards, yerr=twoSigmaBounds) plt.xlabel('Simulation Step') plt.title('Average Simulation Reward with Error Bounds for ' + str(len(allSimRewards)) + ' simulations.') plt.ylabel('Reward') plt.show() def ani(self, bels, allX0, allX1, allX2, allX3, numFrames=20): fig, ax = plt.subplots() a = np.linspace(0, 0, num=100) xlabel = 'Robber X Position' ylabel = 'Robber Y Position' title = 'Belief Animation' images = [] for t in range(0, numFrames): if t != 0: ax.cla() [x, y, c] = bels[t].slice2DFrom4D(vis=False) ax.contourf(x, y, c, cmap='viridis') col = 'b' if (self.distance(allX0[t], allX1[t], allX2[t], allX3[t]) <= 1): col = 'g' cop = ax.scatter(allX0[t], allX1[t], color=col, s=100) robber = ax.scatter(allX2[t], allX3[t], color='red', s=100) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) fig.savefig('../tmp/img' + str(t) + ".png") #print('../tmp/img' + str(t) + ".png") plt.pause(0.5) for k in range(0, numFrames - 1): fname = "../tmp/img%d.png" % k #print(fname); img = mgimg.imread(fname) imgplot = plt.imshow(img) images.append([imgplot]) #fig = plt.figure(); my_ani = animation.ArtistAnimation(fig, images, interval=20) my_ani.save("../Results/animation.gif", fps=2) #plt.show(); def signal_handler(self, signal, frame): print("Stopping Policiy Generation and printing to file") self.exitFlag = True def plotAllSlices(self, a, title): fig, ax = plt.subplots(2, 2) [x1, y1, c1] = a.slice2DFrom4D(vis=False, dims=[0, 2]) ax[0, 0].contourf(x1, y1, c1, cmap='viridis') ax[0, 0].set_title('Cop X with Robber X') [x2, y2, c2] = a.slice2DFrom4D(vis=False, dims=[0, 3]) ax[0, 1].contourf(x2, y2, c2, cmap='viridis') ax[0, 1].set_title('Cop X with Robber Y') [x3, y3, c3] = a.slice2DFrom4D(vis=False, dims=[1, 2]) ax[1, 0].contourf(x3, y3, c3, cmap='viridis') ax[1, 0].set_title('Cop Y with Robber X') [x4, y4, c4] = a.slice2DFrom4D(vis=False, dims=[1, 3]) ax[1, 1].contourf(x4, y4, c4, cmap='viridis') ax[1, 1].set_title('Cop Y with Robber Y') fig.suptitle(title) plt.show() def loadPolicy(self, fileName): self.Gamma = np.load(fileName)