def backup(self, b, als1): G = self.Gamma R = self.rew numActs = 3 bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, numActs): suma = GM() suma.addGM(als1[np.argmax([ self.continuousDot(als1[j][a], b) for j in range(0, len(als1)) ])][a]) suma.scalarMultiply(self.discount) suma.addGM(R) tmp = self.continuousDot(suma, b) # print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def theArena(mix, kmeansFunc, numClusters=4, finalNum=5, verbose=False): """ numClusters: number if intermediate clusters finalNum: final number of mixands per cluster """ startMix = deepcopy(mix) #separate [posMix, negMix, posNorm, negNorm] = separateAndNormalize(startMix) #cluster posClusters = cluster(posMix, kmeansFunc, k=numClusters) #negClusters = cluster(negMix,kmeansFunc,k=numClusters); #condense finalTotalDesired = numClusters * finalNum startingSize = mix.size posCon = conComb(posClusters, finalNum, finalTotalDesired, startingSize) #negCon = conComb(negClusters,finalNum); #recombine newMix = GM() posCon.scalerMultiply(posNorm) newMix.addGM(posCon) #negCon.scalerMultiply(negNorm) #newMix.addGM(negCon); del startMix if (verbose): plotResults(mix, newMix) return newMix
def backupFactored(self,b): G = self.Gamma; R = self.r; pz = self.pz; als1 = self.preAls; bestVal = -10000000000; bestAct= [0,0]; bestGM = []; for am in range(0,len(self.delA)): for aq in range(0,8): suma = GM(); for oq in range(0,2): suma.addGM(als1[np.argmax([self.continuousDot(als1[j][am][aq][oq],b) for j in range(0,len(als1))])][am][aq][oq]); suma.scalerMultiply(self.discount); suma.addGM(R[am]); tmp = self.continuousDot(suma,b); #print(a,tmp); if(tmp > bestVal): bestAct = [am,aq]; bestGM = copy.deepcopy(suma); bestVal = tmp; bestGM.action = bestAct; return bestGM;
def backup(self,b): G = self.Gamma; R = self.r; pz = self.pz; if(self.useSoft): obslen = pz.size; else: obslen = len(pz); als1 = self.preAls; bestVal = -10000000000; bestAct= 0; bestGM = []; for a in range(0,len(self.delA)): suma = GM(); for o in range(0,obslen): suma.addGM(als1[np.argmax([self.continuousDot(als1[j][a][o],b) for j in range(0,len(als1))])][a][o]); suma.scalerMultiply(self.discount); suma.addGM(R[a]); tmp = self.continuousDot(suma,b); #print(a,tmp); if(tmp > bestVal): bestAct = a; bestGM = copy.deepcopy(suma); bestVal = tmp; bestGM.action = bestAct; return bestGM;
def backup(self, b): G = self.Gamma R = self.r pz = self.pz als1 = self.preAls #one alpha for each belief, so one per backup bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(self.delA)): suma = GM() for o in range(0, len(pz)): suma.addGM(als1[np.argmax([ self.continuousDot(als1[j][a][o], b) for j in range(0, len(als1)) ])][a][o]) suma.scalerMultiply(self.discount) suma.addGM(R) tmp = self.continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = suma bestVal = tmp bestGM.action = bestAct return bestGM
def backup(als, modes, delA, delAVar, pz, r, maxMix, b): newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))] for k in range(0, len(als))] for i in range(0, len(als)): for j in range(0, len(delA[0])): for k in range(0, len(pz)): newAls[i][j][k] = GM() for h in modes: tmpGM = als[i].GMProduct(pz[j]) mean = tmpGM.getMeans() for l in range(0, len(mean)): mean[l][0] -= delA[modes.index(h)][j] mean[l] = mean[l][0] var = tmpGM.getVars() for l in range(0, len(var)): var[l][0][0] += delAVar var[l] = var[l][0][0] weights = tmpGM.getWeights() tmpGM2 = GM() for l in range(0, len(mean)): tmpGM2.addG(Gaussian(mean[l], var[l], weights[l])) #tmpGM2 = GM(mean,var,tmpGM.getWeights()); newAls[i][j][k].addGM(tmpGM2.GMProduct(h)) bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(delA[0])): suma = GM() for o in range(0, len(pz)): suma.addGM(newAls[np.argmax([ continuousDot(newAls[j][a][o], b) for j in range(0, len(newAls)) ])][a][o]) suma.scalerMultiply(0.9) suma.addGM(r[a]) for g in suma.Gs: if (isinstance(g.mean, list)): g.mean = g.mean[0] g.var = g.var[0][0] suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20) tmp = continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def conComb(mixtures, max_num_mixands, finalTotalDesired, startingSize): newMix = GM() for gm in mixtures: condensationTarget = max(1, (np.floor(gm.size) * finalTotalDesired) / startingSize) d = deepcopy(condense(gm, condensationTarget)) # print(type(d)) # NOTE: this comment apparently needs to be here to not make d an int... try: if (d.size > 0): newMix.addGM(d) except AttributeError as e: # print('throwing out') # print e pass return newMix
def testMakeNear(): pzIn = Softmax() pzOut = Softmax() cent = [4, 4] orient = 0 nearness = 2 lengthIn = 3 lengthOut = lengthIn + nearness widthIn = 2 widthOut = widthIn + nearness pzIn.buildOrientedRecModel(cent, orient, lengthIn, widthIn, steepness=10) pzOut.buildOrientedRecModel(cent, orient, lengthOut, widthOut, steepness=10) #pzIn.plot2D(low=[0,0],high=[10,10]); #pzOut.plot2D(low=[0,0],high=[10,10]); b = GM() for i in range(0, 10): for j in range(0, 10): b.addG(Gaussian([i, j], [[1, 0], [0, 1]], 1)) b.normalizeWeights() b1 = GM() for i in range(1, 5): b1.addGM(pzIn.runVBND(b, i)) b1.normalizeWeights() b2 = GM() b2.addGM(pzOut.runVBND(b1, 0)) b2.normalizeWeights() fig, axarr = plt.subplots(3) [x, y, c] = b.plot2D(low=[0, 0], high=[10, 10], vis=False) axarr[0].contourf(x, y, c) [x, y, c] = b1.plot2D(low=[0, 0], high=[10, 10], vis=False) axarr[1].contourf(x, y, c) [x, y, c] = b2.plot2D(low=[0, 0], high=[10, 10], vis=False) axarr[2].contourf(x, y, c) plt.show()
def getMDPAction(self, x): maxVal = -10000000 maxGM = GM() bestAct = 0 for a in range(0, len(self.delA)): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = suma.pointEval(x) if (tmpVal > maxVal): maxVal = tmpVal maxGM = suma bestAct = a return bestAct
def testMakeNear(): pzIn = Softmax(); pzOut = Softmax(); cent = [3.5,3.5]; orient = 0; nearness = 2; lengthIn = 3; lengthOut = lengthIn+nearness; widthIn = 2; widthOut = widthIn+nearness; pzIn.buildOrientedRecModel(cent,orient,lengthIn,widthIn,steepness=10); pzOut.buildOrientedRecModel(cent,orient,lengthOut,widthOut,steepness=10); #pzIn.plot2D(low=[0,0],high=[10,10]); #pzOut.plot2D(low=[0,0],high=[10,10]); b = GM(); for i in range(0,11): for j in range(0,11): b.addG(Gaussian([i,j],[[1,0],[0,1]],1)); b.normalizeWeights(); b1 = GM(); for i in range(1,5): b1.addGM(pzIn.runVBND(b,i)); b1.normalizeWeights(); b2 = GM(); b2.addGM(pzOut.runVBND(b1,0)); b2.normalizeWeights(); fig,axarr = plt.subplots(3); [x,y,c] = b.plot2D(low=[0,0],high=[10,10],vis=False); axarr[0].contourf(x,y,c); [x,y,c] = b1.plot2D(low=[0,0],high=[10,10],vis=False); axarr[1].contourf(x,y,c); [x,y,c] = b2.plot2D(low=[0,0],high=[10,10],vis=False); axarr[2].contourf(x,y,c); plt.show();
def preComputeAlsSoftmaxFactored(self): G = self.Gamma; #for each alpha, each movement, each question, each question answer, each view cone als1 = np.zeros(shape = (len(G),len(self.delA),3,2)).tolist(); #questions left, right, in front of, behind for j in range(0,len(G)): for am in range(0,len(self.delA)): for aq in range(0,3): for oq in range(0,2): als1[j][am][aq][oq] = GM(); #get observation from question #If 0, multimodal alObs = GM(); if(oq == 0): for h in range(1,5): if(h!=aq and h!=3): alObs.addGM(self.pz.runVBND(G[j],h)); elif(oq == 1): if(aq==2): alObs.addGM(self.pz.runVBND(G[j],aq+2)); else: alObs.addGM(self.pz.runVBND(G[j],aq+1)); for k in alObs.Gs: mean = (np.matrix(k.mean) - np.matrix(self.delA[am])).tolist(); var = (np.matrix(k.var) + np.matrix(self.delAVar)).tolist(); weight = k.weight; als1[j][am][aq][oq].addG(Gaussian(mean,var,weight)); self.preAls = als1;
def backup(self, b): G = self.Gamma R = self.r pz = self.pz if (self.useSoft): obslen = pz.size else: obslen = len(pz) als1 = self.preAls bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(self.delA)): suma = GM() for o in range(0, obslen): suma.addGM(als1[np.argmax([ self.continuousDot(als1[j][a][o], b) for j in range(0, len(als1)) ])][a][o]) suma.scalerMultiply(self.discount) suma.addGM(R[a]) tmp = self.continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def stateObsUpdate(self,name,relation,pos="Is"): if(name == 'You'): #Take Cops Position, builid box around it cp=self.copPose; points = [[cp[0]-5,cp[1]-5],[cp[0]+5,cp[1]-5],[cp[0]+5,cp[1]+5],[cp[0]-5,cp[1]+5]]; soft = Softmax() soft.buildPointsModel(points,steepness=3); else: soft = self.sketches[name]; softClass = self.spatialRealtions[relation]; if(pos=="Is"): self.belief = soft.runVBND(self.belief,softClass); self.belief.normalizeWeights(); else: tmp = GM(); for i in range(0,5): if(i!=softClass): tmp.addGM(soft.runVBND(self.belief,i)); tmp.normalizeWeights(); self.belief=tmp; if(self.belief.size > self.MAX_BELIEF_SIZE): self.belief.condense(self.MAX_BELIEF_SIZE); self.belief.normalizeWeights()
def testInvertedSoftmaxModels(): b = GM() b.addG(Gaussian([2, 2], [[1, 0], [0, 1]], 1)) b.addG(Gaussian([4, 2], [[1, 0], [0, 1]], 1)) b.addG(Gaussian([2, 4], [[1, 0], [0, 1]], 1)) b.addG(Gaussian([3, 3], [[1, 0], [0, 1]], 1)) b.normalizeWeights() b.plot2D() pz = Softmax() pz.buildOrientedRecModel([2, 2], 0, 1, 1, 5) #pz.plot2D(); startTime = time.clock() b2 = GM() for i in range(1, 5): b2.addGM(pz.runVBND(b, i)) print(time.clock() - startTime) b2.plot2D() startTime = time.clock() b3 = GM() b3.addGM(b) tmpB = pz.runVBND(b, 0) tmpB.normalizeWeights() tmpB.scalerMultiply(-1) b3.addGM(tmpB) tmpBWeights = b3.getWeights() mi = min(b3.getWeights()) #print(mi); for g in b3.Gs: g.weight = g.weight - mi b3.normalizeWeights() print(time.clock() - startTime) #b3.display(); b3.plot2D()
def oldbeliefUpdate(self, belief, responses=None, copPoses=None): print('UPDATING BELIEF') #1. partition means into separate GMs, 1 for each room allBels = [] allBounds = [] copBounds = [] weightSums = [] for room in self.map_.rooms: tmp = GM() tmpw = 0 allBounds.append([ self.map_.rooms[room]['min_x'], self.map_.rooms[room]['min_y'], self.map_.rooms[room]['max_x'], self.map_.rooms[room]['max_y'] ]) for g in belief: m = [g.mean[2], g.mean[3]] # if mean is inside the room if (m[0] < self.map_.rooms[room]['max_x'] and m[0] > self.map_.rooms[room]['min_x'] and m[1] < self.map_.rooms[room]['max_y'] and m[1] > self.map_.rooms[room]['min_y']): tmp.addG(deepcopy(g)) tmpw += g.weight tmp.normalizeWeights() allBels.append(tmp) weightSums.append(tmpw) pose = copPoses[-1] roomCount = 0 copBounds = 0 for room in self.map_.rooms: if (pose[0] < self.map_.rooms[room]['max_x'] and pose[0] > self.map_.rooms[room]['min_x'] and pose[1] < self.map_.rooms[room]['max_y'] and pose[1] > self.map_.rooms[room]['min_y']): copBounds = self.rooms_map_inv[room] roomCount += 1 viewCone = Softmax() viewCone.buildTriView(pose, length=1, steepness=10) for i in range(0, len(viewCone.weights)): viewCone.weights[i] = [ 0, 0, viewCone.weights[i][0], viewCone.weights[i][1] ] #Only update room that cop is in with view cone update #Make sure to renormalize that room # newerBelief = GM(); # for i in range(1,5): # tmpBel = viewCone.runVBND(allBels[copBounds],i); # newerBelief.addGM(tmpBel); # allBels[copBounds] = newerBelief; #Update all rooms for i in range(0, len(allBels)): newerBelief = GM() for j in range(1, 5): tmpBel = viewCone.runVBND(allBels[i], j) if (j == 1): tmpBel.scalerMultiply(.8) newerBelief.addGM(tmpBel) allBels[i] = newerBelief for i in range(0, len(allBels)): allBels[i].normalizeWeights() #allBels[copBounds].normalizeWeights(); print('allBels LENGTH: {}'.format(len(allBels))) #2. use queued observations to update appropriate rooms GM if (responses is not None): for res in responses: roomNum = res[0] mod = res[1] clas = res[2] sign = res[3] if (roomNum == 0): #apply to all for i in range(0, len(allBels)): if (sign == True): allBels[i] = mod.runVBND(allBels[i], 0) else: tmp = GM() for j in range(1, mod.size): tmp.addGM(mod.runVBND(allBels[i], j)) allBels[i] = tmp # else: # print('ROOM NUM: {}'.format(roomNum)) # #apply to roomNum-1; # if(sign == True): # allBels[roomNum-1] = mod.runVBND(allBels[roomNum-1],clas); # else: # tmp = GM(); # for i in range(1,mod.size): # if(i!=clas): # tmp.addGM(mod.runVBND(allBels[roomNum-1],i)); # allBels[roomNum-1] = tmp; else: print('ROOM NUM: {}'.format(roomNum)) #apply to all rooms for i in range(0, len(allBels)): if (sign == True): allBels[i] = mod.runVBND(allBels[i], clas) else: tmp = GM() for j in range(1, mod.size): if (j != clas): tmp.addGM(mod.runVBND(allBels[i], j)) allBels[i] = tmp #2.5. Make sure all GMs stay within their rooms bounds: #Also condense each mixture for gm in allBels: for g in gm: g.mean[2] = max(g.mean[2], allBounds[allBels.index(gm)][0] - 0.01) g.mean[2] = min(g.mean[2], allBounds[allBels.index(gm)][2] + 0.01) g.mean[3] = max(g.mean[3], allBounds[allBels.index(gm)][1] - 0.01) g.mean[3] = min(g.mean[3], allBounds[allBels.index(gm)][3] + 0.01) for i in range(0, len(allBels)): allBels[i].condense(15) # allBels[i] = allBels[i].kmeansCondensationN(6) #3. recombine beliefs newBelief = GM() for g in allBels: g.scalerMultiply(weightSums[allBels.index(g)]) newBelief.addGM(g) newBelief.normalizeWeights() #4. fix cops position in belief for g in newBelief: g.mean = [copPoses[0][0], copPoses[0][1], g.mean[2], g.mean[3]] g.var[0][0] = 0.1 g.var[0][1] = 0 g.var[1][0] = 0 g.var[1][1] = 0.1 #5. add uncertainty for robber position for g in newBelief: g.var[2][2] += 0 g.var[3][3] += 0 # newBelief.normalizeWeights(); if copPoses is not None: pose = copPoses[len(copPoses) - 1] print("MAP COP POSE TO PLOT: {}".format(pose)) self.makeBeliefMap(newBelief, pose) return newBelief
def beliefUpdate(self, belief, responses=None, copPoses=None): # #Create Cop View Cone # pose = copPoses[-1]; # viewCone = Softmax(); # viewCone.buildTriView(pose,length=1,steepness=10); # for i in range(0,len(viewCone.weights)): # viewCone.weights[i] = [0,0,viewCone.weights[i][0],viewCone.weights[i][1]]; #Update Cop View Cone # newerBelief = GM(); # for j in range(1,5): # tmpBel = viewCone.runVBND(belief,j); # if(j==1): # tmpBel.scalerMultiply(.4); # newerBelief.addGM(tmpBel); #Dont Update Cop View Cone #newerBelief = belief #4. update cops position to current position for g in belief: g.mean = [copPoses[-1][0], copPoses[-1][1], g.mean[2], g.mean[3]] g.var[0][0] = 0.1 g.var[0][1] = 0 g.var[1][0] = 0 g.var[1][1] = 0.1 #5. update belief with robber dynamics for g in belief: g.var[2][2] += 0.03 g.var[3][3] += 0.03 #Distance Cutoff #How many standard deviations away from the cop should gaussians be updated with view cone? distCut = 2 #Update Cop View Cone Using LWIS newerBelief = GM() for pose in copPoses: #Create Cop View Cone #pose = copPoses[-1]; viewCone = Softmax() viewCone.buildTriView(pose, length=1, steepness=10) for i in range(0, len(viewCone.weights)): viewCone.weights[i] = [ 0, 0, viewCone.weights[i][0], viewCone.weights[i][1] ] for g in belief: #If the gaussian is suffciently close to the pose #based on mahalanobis distance. #Logic: M-dist basically says how many standard devs away the point is from the mean #If it's more than distCut, it should be left alone gprime = Gaussian() gprime.mean = [g.mean[2], g.mean[3]] gprime.var = [[g.var[2][2], g.var[2][3]], [g.var[3][2], g.var[3][3]]] gprime.weight = g.weight #print(gprime.mean,gprime.mahalanobisDistance([pose[0]-np.cos(pose[2])*.5,pose[1]-np.sin(pose[2])*.5])); if (gprime.mahalanobisDistance([ pose[0] - np.cos(pose[2]) * .5, pose[1] - np.sin(pose[2]) * .5 ]) <= distCut): newG = viewCone.lwisUpdate(g, 0, 500, inverse=True) newerBelief.addG(newG) else: newerBelief.addG(g) #after each bayes update for the view cone, re-normalize #Just to be sure, it never hurts to check newerBelief.normalizeWeights() # newerBelief= belief #Update From Responses if (responses is not None): for res in responses: roomNum = res[0] mod = res[1] clas = res[2] sign = res[3] if (roomNum == 0): #apply to all if (sign == True): newerBelief = mod.runVBND(newerBelief, 0) else: tmp = GM() for j in range(1, mod.size): tmp.addGM(mod.runVBND(newerBelief, j)) newerBelief = tmp else: print('ROOM NUM: {}'.format(roomNum)) #apply to all rooms if (sign == True): newerBelief = mod.runVBND(newerBelief, clas) else: tmp = GM() for j in range(1, mod.size): if (j != clas): tmp.addGM(mod.runVBND(newerBelief, j)) newerBelief = tmp #Each response recieves a full bayes update, so we need to normalize each time newerBelief.normalizeWeights() #Condense the belief newerBelief.condense(15) print("*********************") print(newerBelief.size) print("*********************") #Make sure there is a belief in each room #A bit of a hack, but if this isn't here the lower level query fails # for room in self.map_.rooms: # centx = (self.map_.rooms[room]['max_x'] + self.map_.rooms[room]['min_x'])/2; # centy = (self.map_.rooms[room]['max_y'] + self.map_.rooms[room]['min_y'])/2; # var = np.identity(4).tolist(); # newerBelief.addG(Gaussian([0,0,centx,centy],var,0.00001)); #3. recombine beliefs (if we're still doing that sort of thing) newBelief = newerBelief newBelief.normalizeWeights() #Moved to before observation updates # #4. update cops position to current position # for g in newBelief: # g.mean = [copPoses[-1][0],copPoses[-1][1],g.mean[2],g.mean[3]]; # g.var[0][0] = 0.1; # g.var[0][1] = 0; # g.var[1][0] = 0; # g.var[1][1] = 0.1; # #5. update belief with robber dynamics # for g in newBelief: # g.var[2][2] += 0.05; # g.var[3][3] += 0.05; print("*********************") print(newBelief.size) print("*********************") return newBelief
def backup(als, modes, delA, delAVar, pz, r, maxMix, b): newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))] for k in range(0, len(als))] for i in range(0, len(als)): for j in range(0, len(delA[0])): for k in range(0, len(pz)): newAls[i][j][k] = GM() for h in range(0, len(modes.weights)): #print(als[i].getVars()); tmp1 = modes.runVB(als[i], h) for l in range(0, pz[k].size): for p in range(0, tmp1.size): mixp = tmp1.Gs[p] mixl = pz[k].Gs[l] weight1 = mixp.weight * mixl.weight weight = weight1 * mvn.pdf(mixp.mean, mixl.mean, mixp.var + mixl.var) c2 = (mixp.var**-1 + mixl.var**-1)**-1 c1 = c2 * (mixp.var**-1 * mixp.mean + mixl.var**-1 * mixl.mean) mean = c1 - delA[h][j] var = c2 + delAVar newAls[i][j][k].addG(Gaussian(mean, var, weight)) bestVal = -10000000000 bestAct = 0 bestGM = [] for a in range(0, len(delA[0])): suma = GM() for o in range(0, len(pz)): suma.addGM(newAls[np.argmax([ continuousDot(newAls[j][a][o], b) for j in range(0, len(newAls)) ])][a][o]) suma.scalerMultiply(0.9) suma.addGM(r[a]) for g in suma.Gs: if (isinstance(g.mean, list)): g.mean = g.mean[0] if (isinstance(g.var, list)): g.var = g.var[0][0] #suma = suma.kmeansCondensationN(k=maxMix,lowInit = -20,highInit=20); tmp = continuousDot(suma, b) #print(a,tmp); if (tmp > bestVal): bestAct = a bestGM = copy.deepcopy(suma) bestVal = tmp bestGM.action = bestAct return bestGM
def MDPValueIteration(self, gen=True): if (gen): #Intialize Value function self.ValueFunc = copy.deepcopy(self.r) for g in self.ValueFunc.Gs: g.weight = -1000 comparision = GM() comparision.addG( Gaussian( [1, 0, 0, 0], [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], 1)) uniform = GM() for i in range(0, 5): for j in range(0, 5): for k in range(0, 5): for l in range(0, 5): uniform.addG( Gaussian([i, j, k, l], [[4, 0, 0, 0], [0, 4, 0, 0], [0, 0, 4, 0], [0, 0, 0, 4]], 1)) count = 0 #until convergence while (not self.ValueFunc.comp(comparision) and count < 30): print(count) comparision = copy.deepcopy(self.ValueFunc) count += 1 #print(count); maxVal = -10000000 maxGM = GM() for a in range(0, 2): suma = GM() for g in self.ValueFunc.Gs: mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist() var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist() suma.addG(Gaussian(mean, var, g.weight)) suma.addGM(self.r) tmpVal = self.continuousDot(uniform, suma) if (tmpVal > maxVal): maxVal = tmpVal maxGM = copy.deepcopy(suma) maxGM.scalerMultiply(self.discount) maxGM = maxGM.kmeansCondensationN(20) self.ValueFunc = copy.deepcopy(maxGM) #self.ValueFunc.display(); #self.ValueFunc.plot2D(); print("MDP Value Iteration Complete") #f = open("../policies/MDP4DIntercept.npy","w"); #np.save(f,self.ValueFunc); file = "policies/MDP4DIntercept" self.ValueFunc.printGMArrayToFile([self.ValueFunc], file) else: #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist(); file = "policies/MDP4DIntercept" tmp = GM() self.ValueFunc = tmp.readGMArray4D(file)[0]