Ejemplo n.º 1
0
    def backup(self, b, als1):
        G = self.Gamma
        R = self.rew
        numActs = 3

        bestVal = -10000000000
        bestAct = 0
        bestGM = []

        for a in range(0, numActs):
            suma = GM()
            suma.addGM(als1[np.argmax([
                self.continuousDot(als1[j][a], b) for j in range(0, len(als1))
            ])][a])
            suma.scalarMultiply(self.discount)
            suma.addGM(R)

            tmp = self.continuousDot(suma, b)
            # print(a,tmp);
            if (tmp > bestVal):
                bestAct = a
                bestGM = deepcopy(suma)
                bestVal = tmp

        bestGM.action = bestAct

        return bestGM
Ejemplo n.º 2
0
def theArena(mix, kmeansFunc, numClusters=4, finalNum=5, verbose=False):
    """
	numClusters: number if intermediate clusters
	finalNum: final number of mixands per cluster
	"""
    startMix = deepcopy(mix)

    #separate
    [posMix, negMix, posNorm, negNorm] = separateAndNormalize(startMix)

    #cluster
    posClusters = cluster(posMix, kmeansFunc, k=numClusters)
    #negClusters = cluster(negMix,kmeansFunc,k=numClusters);

    #condense
    finalTotalDesired = numClusters * finalNum
    startingSize = mix.size
    posCon = conComb(posClusters, finalNum, finalTotalDesired, startingSize)
    #negCon = conComb(negClusters,finalNum);

    #recombine
    newMix = GM()
    posCon.scalerMultiply(posNorm)
    newMix.addGM(posCon)
    #negCon.scalerMultiply(negNorm)
    #newMix.addGM(negCon);

    del startMix
    if (verbose):
        plotResults(mix, newMix)
    return newMix
Ejemplo n.º 3
0
	def backupFactored(self,b):
		G = self.Gamma; 
		R = self.r; 
		pz = self.pz; 


		als1 = self.preAls; 

		bestVal = -10000000000; 
		bestAct= [0,0]; 
		bestGM = []; 

		for am in range(0,len(self.delA)):
			for aq in range(0,8):
				suma = GM(); 
				for oq in range(0,2):
					suma.addGM(als1[np.argmax([self.continuousDot(als1[j][am][aq][oq],b) for j in range(0,len(als1))])][am][aq][oq]); 
				suma.scalerMultiply(self.discount); 
				suma.addGM(R[am]); 

				tmp = self.continuousDot(suma,b);
				#print(a,tmp); 
				if(tmp > bestVal):
					bestAct = [am,aq]; 
					bestGM = copy.deepcopy(suma); 
					bestVal = tmp; 

		bestGM.action = bestAct; 

		return bestGM;
Ejemplo n.º 4
0
	def backup(self,b):
		G = self.Gamma; 
		R = self.r; 
		pz = self.pz; 

		if(self.useSoft):
			obslen = pz.size; 
		else:
			obslen = len(pz); 

		als1 = self.preAls; 
		

		bestVal = -10000000000; 
		bestAct= 0; 
		bestGM = []; 

		for a in range(0,len(self.delA)):
			suma = GM(); 
			for o in range(0,obslen):
				suma.addGM(als1[np.argmax([self.continuousDot(als1[j][a][o],b) for j in range(0,len(als1))])][a][o]); 
			suma.scalerMultiply(self.discount); 
			suma.addGM(R[a]); 

			tmp = self.continuousDot(suma,b);
			#print(a,tmp); 
			if(tmp > bestVal):
				bestAct = a; 
				bestGM = copy.deepcopy(suma); 
				bestVal = tmp; 

		bestGM.action = bestAct; 

		return bestGM;  
Ejemplo n.º 5
0
    def backup(self, b):
        G = self.Gamma
        R = self.r
        pz = self.pz

        als1 = self.preAls

        #one alpha for each belief, so one per backup

        bestVal = -10000000000
        bestAct = 0
        bestGM = []

        for a in range(0, len(self.delA)):
            suma = GM()
            for o in range(0, len(pz)):
                suma.addGM(als1[np.argmax([
                    self.continuousDot(als1[j][a][o], b)
                    for j in range(0, len(als1))
                ])][a][o])
            suma.scalerMultiply(self.discount)
            suma.addGM(R)

            tmp = self.continuousDot(suma, b)
            #print(a,tmp);
            if (tmp > bestVal):
                bestAct = a
                bestGM = suma
                bestVal = tmp

        bestGM.action = bestAct

        return bestGM
def backup(als, modes, delA, delAVar, pz, r, maxMix, b):

    newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))]
              for k in range(0, len(als))]

    for i in range(0, len(als)):
        for j in range(0, len(delA[0])):
            for k in range(0, len(pz)):
                newAls[i][j][k] = GM()

                for h in modes:
                    tmpGM = als[i].GMProduct(pz[j])
                    mean = tmpGM.getMeans()
                    for l in range(0, len(mean)):
                        mean[l][0] -= delA[modes.index(h)][j]
                        mean[l] = mean[l][0]
                    var = tmpGM.getVars()
                    for l in range(0, len(var)):
                        var[l][0][0] += delAVar
                        var[l] = var[l][0][0]
                    weights = tmpGM.getWeights()
                    tmpGM2 = GM()
                    for l in range(0, len(mean)):
                        tmpGM2.addG(Gaussian(mean[l], var[l], weights[l]))
                    #tmpGM2 = GM(mean,var,tmpGM.getWeights());

                    newAls[i][j][k].addGM(tmpGM2.GMProduct(h))

    bestVal = -10000000000
    bestAct = 0
    bestGM = []

    for a in range(0, len(delA[0])):
        suma = GM()
        for o in range(0, len(pz)):
            suma.addGM(newAls[np.argmax([
                continuousDot(newAls[j][a][o], b)
                for j in range(0, len(newAls))
            ])][a][o])
        suma.scalerMultiply(0.9)
        suma.addGM(r[a])

        for g in suma.Gs:
            if (isinstance(g.mean, list)):
                g.mean = g.mean[0]
                g.var = g.var[0][0]

        suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20)

        tmp = continuousDot(suma, b)
        #print(a,tmp);
        if (tmp > bestVal):
            bestAct = a
            bestGM = copy.deepcopy(suma)
            bestVal = tmp

    bestGM.action = bestAct

    return bestGM
Ejemplo n.º 7
0
def conComb(mixtures, max_num_mixands, finalTotalDesired, startingSize):
    newMix = GM()
    for gm in mixtures:
        condensationTarget = max(1, (np.floor(gm.size) * finalTotalDesired) /
                                 startingSize)
        d = deepcopy(condense(gm, condensationTarget))
        # print(type(d))
        # NOTE: this comment apparently needs to be here to not make d an int...
        try:
            if (d.size > 0):
                newMix.addGM(d)
        except AttributeError as e:
            # print('throwing out')
            # print e
            pass
    return newMix
Ejemplo n.º 8
0
def testMakeNear():
    pzIn = Softmax()
    pzOut = Softmax()

    cent = [4, 4]
    orient = 0
    nearness = 2

    lengthIn = 3
    lengthOut = lengthIn + nearness
    widthIn = 2
    widthOut = widthIn + nearness

    pzIn.buildOrientedRecModel(cent, orient, lengthIn, widthIn, steepness=10)
    pzOut.buildOrientedRecModel(cent,
                                orient,
                                lengthOut,
                                widthOut,
                                steepness=10)

    #pzIn.plot2D(low=[0,0],high=[10,10]);
    #pzOut.plot2D(low=[0,0],high=[10,10]);

    b = GM()
    for i in range(0, 10):
        for j in range(0, 10):
            b.addG(Gaussian([i, j], [[1, 0], [0, 1]], 1))
    b.normalizeWeights()

    b1 = GM()
    for i in range(1, 5):
        b1.addGM(pzIn.runVBND(b, i))
    b1.normalizeWeights()

    b2 = GM()
    b2.addGM(pzOut.runVBND(b1, 0))
    b2.normalizeWeights()

    fig, axarr = plt.subplots(3)
    [x, y, c] = b.plot2D(low=[0, 0], high=[10, 10], vis=False)
    axarr[0].contourf(x, y, c)
    [x, y, c] = b1.plot2D(low=[0, 0], high=[10, 10], vis=False)
    axarr[1].contourf(x, y, c)
    [x, y, c] = b2.plot2D(low=[0, 0], high=[10, 10], vis=False)
    axarr[2].contourf(x, y, c)
    plt.show()
Ejemplo n.º 9
0
    def getMDPAction(self, x):
        maxVal = -10000000
        maxGM = GM()
        bestAct = 0
        for a in range(0, len(self.delA)):
            suma = GM()
            for g in self.ValueFunc.Gs:
                mean = (np.matrix(g.mean) - np.matrix(self.delA[a])).tolist()
                var = (np.matrix(g.var) + np.matrix(self.delAVar)).tolist()
                suma.addG(Gaussian(mean, var, g.weight))
            suma.addGM(self.r)

            tmpVal = suma.pointEval(x)
            if (tmpVal > maxVal):
                maxVal = tmpVal
                maxGM = suma
                bestAct = a
        return bestAct
def testMakeNear():
	pzIn = Softmax(); 
	pzOut = Softmax(); 

	cent = [3.5,3.5]; 
	orient = 0;
	nearness = 2; 

	lengthIn = 3; 
	lengthOut = lengthIn+nearness; 
	widthIn = 2; 
	widthOut = widthIn+nearness; 


	pzIn.buildOrientedRecModel(cent,orient,lengthIn,widthIn,steepness=10); 
	pzOut.buildOrientedRecModel(cent,orient,lengthOut,widthOut,steepness=10); 

	#pzIn.plot2D(low=[0,0],high=[10,10]);
	#pzOut.plot2D(low=[0,0],high=[10,10]);

	b = GM(); 
	for i in range(0,11):
		for j in range(0,11):
			b.addG(Gaussian([i,j],[[1,0],[0,1]],1)); 
	b.normalizeWeights(); 

	b1 = GM(); 
	for i in range(1,5):
		b1.addGM(pzIn.runVBND(b,i)); 
	b1.normalizeWeights(); 

	b2 = GM(); 
	b2.addGM(pzOut.runVBND(b1,0)); 
	b2.normalizeWeights(); 

	fig,axarr = plt.subplots(3); 
	[x,y,c] = b.plot2D(low=[0,0],high=[10,10],vis=False); 
	axarr[0].contourf(x,y,c); 
	[x,y,c] = b1.plot2D(low=[0,0],high=[10,10],vis=False); 
	axarr[1].contourf(x,y,c); 
	[x,y,c] = b2.plot2D(low=[0,0],high=[10,10],vis=False); 
	axarr[2].contourf(x,y,c); 
	plt.show(); 
Ejemplo n.º 11
0
	def preComputeAlsSoftmaxFactored(self): 
		G = self.Gamma; 
		#for each alpha, each movement, each question, each question answer, each view cone
		als1 = np.zeros(shape = (len(G),len(self.delA),3,2)).tolist(); 

		#questions left, right, in front of,  behind

		for j in range(0,len(G)):
			for am in range(0,len(self.delA)):
				for aq in range(0,3):
					for oq in range(0,2):
						als1[j][am][aq][oq] = GM(); 
						#get observation from question
						#If 0, multimodal
						alObs = GM(); 
						if(oq == 0):
							for h in range(1,5):
								if(h!=aq and h!=3):
									alObs.addGM(self.pz.runVBND(G[j],h)); 
						elif(oq == 1):
							if(aq==2):
								alObs.addGM(self.pz.runVBND(G[j],aq+2));
							else:
								alObs.addGM(self.pz.runVBND(G[j],aq+1));


						for k in alObs.Gs:
							mean = (np.matrix(k.mean) - np.matrix(self.delA[am])).tolist(); 
							var = (np.matrix(k.var) + np.matrix(self.delAVar)).tolist(); 
							weight = k.weight; 
							als1[j][am][aq][oq].addG(Gaussian(mean,var,weight)); 
		self.preAls = als1; 
    def backup(self, b):
        G = self.Gamma
        R = self.r
        pz = self.pz

        if (self.useSoft):
            obslen = pz.size
        else:
            obslen = len(pz)

        als1 = self.preAls

        bestVal = -10000000000
        bestAct = 0
        bestGM = []

        for a in range(0, len(self.delA)):
            suma = GM()
            for o in range(0, obslen):
                suma.addGM(als1[np.argmax([
                    self.continuousDot(als1[j][a][o], b)
                    for j in range(0, len(als1))
                ])][a][o])
            suma.scalerMultiply(self.discount)
            suma.addGM(R[a])

            tmp = self.continuousDot(suma, b)
            #print(a,tmp);
            if (tmp > bestVal):
                bestAct = a
                bestGM = copy.deepcopy(suma)
                bestVal = tmp

        bestGM.action = bestAct

        return bestGM
Ejemplo n.º 13
0
	def stateObsUpdate(self,name,relation,pos="Is"):
		if(name == 'You'):
			#Take Cops Position, builid box around it
			cp=self.copPose; 
			points = [[cp[0]-5,cp[1]-5],[cp[0]+5,cp[1]-5],[cp[0]+5,cp[1]+5],[cp[0]-5,cp[1]+5]]; 
			soft = Softmax()
			soft.buildPointsModel(points,steepness=3); 
		else:
			soft = self.sketches[name]; 
		softClass = self.spatialRealtions[relation]; 

		if(pos=="Is"):
			self.belief = soft.runVBND(self.belief,softClass); 
			self.belief.normalizeWeights(); 
		else:
			tmp = GM();
			for i in range(0,5):
				if(i!=softClass):
					tmp.addGM(soft.runVBND(self.belief,i));
			tmp.normalizeWeights(); 
			self.belief=tmp; 
		if(self.belief.size > self.MAX_BELIEF_SIZE):
			self.belief.condense(self.MAX_BELIEF_SIZE); 
			self.belief.normalizeWeights()
Ejemplo n.º 14
0
def testInvertedSoftmaxModels():

    b = GM()
    b.addG(Gaussian([2, 2], [[1, 0], [0, 1]], 1))
    b.addG(Gaussian([4, 2], [[1, 0], [0, 1]], 1))
    b.addG(Gaussian([2, 4], [[1, 0], [0, 1]], 1))
    b.addG(Gaussian([3, 3], [[1, 0], [0, 1]], 1))
    b.normalizeWeights()

    b.plot2D()

    pz = Softmax()
    pz.buildOrientedRecModel([2, 2], 0, 1, 1, 5)
    #pz.plot2D();

    startTime = time.clock()
    b2 = GM()
    for i in range(1, 5):
        b2.addGM(pz.runVBND(b, i))
    print(time.clock() - startTime)
    b2.plot2D()

    startTime = time.clock()
    b3 = GM()
    b3.addGM(b)
    tmpB = pz.runVBND(b, 0)
    tmpB.normalizeWeights()
    tmpB.scalerMultiply(-1)

    b3.addGM(tmpB)

    tmpBWeights = b3.getWeights()
    mi = min(b3.getWeights())
    #print(mi);
    for g in b3.Gs:
        g.weight = g.weight - mi

    b3.normalizeWeights()
    print(time.clock() - startTime)
    #b3.display();
    b3.plot2D()
Ejemplo n.º 15
0
    def oldbeliefUpdate(self, belief, responses=None, copPoses=None):
        print('UPDATING BELIEF')
        #1. partition means into separate GMs, 1 for each room
        allBels = []
        allBounds = []
        copBounds = []
        weightSums = []
        for room in self.map_.rooms:
            tmp = GM()
            tmpw = 0

            allBounds.append([
                self.map_.rooms[room]['min_x'], self.map_.rooms[room]['min_y'],
                self.map_.rooms[room]['max_x'], self.map_.rooms[room]['max_y']
            ])
            for g in belief:
                m = [g.mean[2], g.mean[3]]
                # if mean is inside the room
                if (m[0] < self.map_.rooms[room]['max_x']
                        and m[0] > self.map_.rooms[room]['min_x']
                        and m[1] < self.map_.rooms[room]['max_y']
                        and m[1] > self.map_.rooms[room]['min_y']):
                    tmp.addG(deepcopy(g))
                    tmpw += g.weight

            tmp.normalizeWeights()
            allBels.append(tmp)

            weightSums.append(tmpw)

        pose = copPoses[-1]
        roomCount = 0
        copBounds = 0
        for room in self.map_.rooms:
            if (pose[0] < self.map_.rooms[room]['max_x']
                    and pose[0] > self.map_.rooms[room]['min_x']
                    and pose[1] < self.map_.rooms[room]['max_y']
                    and pose[1] > self.map_.rooms[room]['min_y']):
                copBounds = self.rooms_map_inv[room]
            roomCount += 1

        viewCone = Softmax()
        viewCone.buildTriView(pose, length=1, steepness=10)
        for i in range(0, len(viewCone.weights)):
            viewCone.weights[i] = [
                0, 0, viewCone.weights[i][0], viewCone.weights[i][1]
            ]

        #Only update room that cop is in with view cone update
        #Make sure to renormalize that room
        # newerBelief = GM();
        # for i in range(1,5):
        # 	tmpBel = viewCone.runVBND(allBels[copBounds],i);
        # 	newerBelief.addGM(tmpBel);
        # allBels[copBounds] = newerBelief;

        #Update all rooms
        for i in range(0, len(allBels)):
            newerBelief = GM()
            for j in range(1, 5):
                tmpBel = viewCone.runVBND(allBels[i], j)
                if (j == 1):
                    tmpBel.scalerMultiply(.8)
                newerBelief.addGM(tmpBel)

            allBels[i] = newerBelief

        for i in range(0, len(allBels)):
            allBels[i].normalizeWeights()
        #allBels[copBounds].normalizeWeights();

        print('allBels LENGTH: {}'.format(len(allBels)))

        #2. use queued observations to update appropriate rooms GM
        if (responses is not None):
            for res in responses:
                roomNum = res[0]
                mod = res[1]
                clas = res[2]
                sign = res[3]

                if (roomNum == 0):
                    #apply to all
                    for i in range(0, len(allBels)):
                        if (sign == True):
                            allBels[i] = mod.runVBND(allBels[i], 0)
                        else:
                            tmp = GM()
                            for j in range(1, mod.size):
                                tmp.addGM(mod.runVBND(allBels[i], j))
                            allBels[i] = tmp

                # else:
                # 	print('ROOM NUM: {}'.format(roomNum))
                # 	#apply to roomNum-1;
                # 	if(sign == True):
                # 		allBels[roomNum-1] = mod.runVBND(allBels[roomNum-1],clas);
                # 	else:
                # 		tmp = GM();
                # 		for i in range(1,mod.size):
                # 			if(i!=clas):
                # 				tmp.addGM(mod.runVBND(allBels[roomNum-1],i));
                # 		allBels[roomNum-1] = tmp;
                else:
                    print('ROOM NUM: {}'.format(roomNum))
                    #apply to all rooms
                    for i in range(0, len(allBels)):
                        if (sign == True):
                            allBels[i] = mod.runVBND(allBels[i], clas)
                        else:
                            tmp = GM()
                            for j in range(1, mod.size):
                                if (j != clas):
                                    tmp.addGM(mod.runVBND(allBels[i], j))
                            allBels[i] = tmp

        #2.5. Make sure all GMs stay within their rooms bounds:
        #Also condense each mixture
        for gm in allBels:
            for g in gm:
                g.mean[2] = max(g.mean[2],
                                allBounds[allBels.index(gm)][0] - 0.01)
                g.mean[2] = min(g.mean[2],
                                allBounds[allBels.index(gm)][2] + 0.01)
                g.mean[3] = max(g.mean[3],
                                allBounds[allBels.index(gm)][1] - 0.01)
                g.mean[3] = min(g.mean[3],
                                allBounds[allBels.index(gm)][3] + 0.01)

        for i in range(0, len(allBels)):
            allBels[i].condense(15)
#			allBels[i] = allBels[i].kmeansCondensationN(6)

#3. recombine beliefs
        newBelief = GM()
        for g in allBels:
            g.scalerMultiply(weightSums[allBels.index(g)])
            newBelief.addGM(g)
        newBelief.normalizeWeights()

        #4. fix cops position in belief
        for g in newBelief:
            g.mean = [copPoses[0][0], copPoses[0][1], g.mean[2], g.mean[3]]
            g.var[0][0] = 0.1
            g.var[0][1] = 0
            g.var[1][0] = 0
            g.var[1][1] = 0.1

        #5. add uncertainty for robber position
        for g in newBelief:
            g.var[2][2] += 0
            g.var[3][3] += 0

        # newBelief.normalizeWeights();

        if copPoses is not None:
            pose = copPoses[len(copPoses) - 1]
            print("MAP COP POSE TO PLOT: {}".format(pose))
            self.makeBeliefMap(newBelief, pose)

        return newBelief
Ejemplo n.º 16
0
    def beliefUpdate(self, belief, responses=None, copPoses=None):
        # #Create Cop View Cone
        # pose = copPoses[-1];
        # viewCone = Softmax();
        # viewCone.buildTriView(pose,length=1,steepness=10);
        # for i in range(0,len(viewCone.weights)):
        # 	viewCone.weights[i] = [0,0,viewCone.weights[i][0],viewCone.weights[i][1]];

        #Update Cop View Cone
        # newerBelief = GM();
        # for j in range(1,5):
        # 	tmpBel = viewCone.runVBND(belief,j);
        # 	if(j==1):
        # 		tmpBel.scalerMultiply(.4);
        # 	newerBelief.addGM(tmpBel);

        #Dont Update Cop View Cone
        #newerBelief = belief

        #4. update cops position to current position
        for g in belief:
            g.mean = [copPoses[-1][0], copPoses[-1][1], g.mean[2], g.mean[3]]
            g.var[0][0] = 0.1
            g.var[0][1] = 0
            g.var[1][0] = 0
            g.var[1][1] = 0.1

        #5. update belief with robber dynamics
        for g in belief:
            g.var[2][2] += 0.03
            g.var[3][3] += 0.03

        #Distance Cutoff
        #How many standard deviations away from the cop should gaussians be updated with view cone?
        distCut = 2

        #Update Cop View Cone Using LWIS
        newerBelief = GM()

        for pose in copPoses:
            #Create Cop View Cone
            #pose = copPoses[-1];
            viewCone = Softmax()
            viewCone.buildTriView(pose, length=1, steepness=10)
            for i in range(0, len(viewCone.weights)):
                viewCone.weights[i] = [
                    0, 0, viewCone.weights[i][0], viewCone.weights[i][1]
                ]
            for g in belief:
                #If the gaussian is suffciently close to the pose
                #based on mahalanobis distance.
                #Logic: M-dist basically says how many standard devs away the point is from the mean
                #If it's more than distCut, it should be left alone
                gprime = Gaussian()
                gprime.mean = [g.mean[2], g.mean[3]]
                gprime.var = [[g.var[2][2], g.var[2][3]],
                              [g.var[3][2], g.var[3][3]]]
                gprime.weight = g.weight
                #print(gprime.mean,gprime.mahalanobisDistance([pose[0]-np.cos(pose[2])*.5,pose[1]-np.sin(pose[2])*.5]));
                if (gprime.mahalanobisDistance([
                        pose[0] - np.cos(pose[2]) * .5,
                        pose[1] - np.sin(pose[2]) * .5
                ]) <= distCut):
                    newG = viewCone.lwisUpdate(g, 0, 500, inverse=True)
                    newerBelief.addG(newG)
                else:
                    newerBelief.addG(g)
                #after each bayes update for the view cone, re-normalize
                #Just to be sure, it never hurts to check
            newerBelief.normalizeWeights()
#		newerBelief= belief

#Update From Responses
        if (responses is not None):
            for res in responses:
                roomNum = res[0]
                mod = res[1]
                clas = res[2]
                sign = res[3]

                if (roomNum == 0):
                    #apply to all
                    if (sign == True):
                        newerBelief = mod.runVBND(newerBelief, 0)
                    else:
                        tmp = GM()
                        for j in range(1, mod.size):
                            tmp.addGM(mod.runVBND(newerBelief, j))
                        newerBelief = tmp
                else:
                    print('ROOM NUM: {}'.format(roomNum))
                    #apply to all rooms
                    if (sign == True):
                        newerBelief = mod.runVBND(newerBelief, clas)
                    else:
                        tmp = GM()
                        for j in range(1, mod.size):
                            if (j != clas):
                                tmp.addGM(mod.runVBND(newerBelief, j))
                        newerBelief = tmp
                #Each response recieves a full bayes update, so we need to normalize each time
                newerBelief.normalizeWeights()

        #Condense the belief
        newerBelief.condense(15)

        print("*********************")
        print(newerBelief.size)
        print("*********************")

        #Make sure there is a belief in each room
        #A bit of a hack, but if this isn't here the lower level query fails
        # for room in self.map_.rooms:
        # 	centx = (self.map_.rooms[room]['max_x'] + self.map_.rooms[room]['min_x'])/2;
        #        centy = (self.map_.rooms[room]['max_y'] + self.map_.rooms[room]['min_y'])/2;
        #        var = np.identity(4).tolist();
        #        newerBelief.addG(Gaussian([0,0,centx,centy],var,0.00001));

        #3. recombine beliefs (if we're still doing that sort of thing)
        newBelief = newerBelief
        newBelief.normalizeWeights()

        #Moved to before observation updates
        # #4. update cops position to current position
        # for g in newBelief:
        # 	g.mean = [copPoses[-1][0],copPoses[-1][1],g.mean[2],g.mean[3]];
        # 	g.var[0][0] = 0.1;
        # 	g.var[0][1] = 0;
        # 	g.var[1][0] = 0;
        # 	g.var[1][1] = 0.1;

        # #5. update belief with robber dynamics
        # for g in newBelief:
        # 	g.var[2][2] += 0.05;
        # 	g.var[3][3] += 0.05;

        print("*********************")
        print(newBelief.size)
        print("*********************")

        return newBelief
def backup(als, modes, delA, delAVar, pz, r, maxMix, b):

    newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))]
              for k in range(0, len(als))]

    for i in range(0, len(als)):
        for j in range(0, len(delA[0])):
            for k in range(0, len(pz)):
                newAls[i][j][k] = GM()

                for h in range(0, len(modes.weights)):
                    #print(als[i].getVars());
                    tmp1 = modes.runVB(als[i], h)
                    for l in range(0, pz[k].size):
                        for p in range(0, tmp1.size):
                            mixp = tmp1.Gs[p]
                            mixl = pz[k].Gs[l]

                            weight1 = mixp.weight * mixl.weight
                            weight = weight1 * mvn.pdf(mixp.mean, mixl.mean,
                                                       mixp.var + mixl.var)

                            c2 = (mixp.var**-1 + mixl.var**-1)**-1
                            c1 = c2 * (mixp.var**-1 * mixp.mean +
                                       mixl.var**-1 * mixl.mean)

                            mean = c1 - delA[h][j]
                            var = c2 + delAVar

                            newAls[i][j][k].addG(Gaussian(mean, var, weight))

    bestVal = -10000000000
    bestAct = 0
    bestGM = []

    for a in range(0, len(delA[0])):
        suma = GM()
        for o in range(0, len(pz)):
            suma.addGM(newAls[np.argmax([
                continuousDot(newAls[j][a][o], b)
                for j in range(0, len(newAls))
            ])][a][o])
        suma.scalerMultiply(0.9)
        suma.addGM(r[a])

        for g in suma.Gs:
            if (isinstance(g.mean, list)):
                g.mean = g.mean[0]
            if (isinstance(g.var, list)):
                g.var = g.var[0][0]

        #suma = suma.kmeansCondensationN(k=maxMix,lowInit = -20,highInit=20);

        tmp = continuousDot(suma, b)
        #print(a,tmp);
        if (tmp > bestVal):
            bestAct = a
            bestGM = copy.deepcopy(suma)
            bestVal = tmp

    bestGM.action = bestAct

    return bestGM
Ejemplo n.º 18
0
    def MDPValueIteration(self, gen=True):
        if (gen):
            #Intialize Value function
            self.ValueFunc = copy.deepcopy(self.r)
            for g in self.ValueFunc.Gs:
                g.weight = -1000

            comparision = GM()
            comparision.addG(
                Gaussian(
                    [1, 0, 0, 0],
                    [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
                    1))

            uniform = GM()
            for i in range(0, 5):
                for j in range(0, 5):
                    for k in range(0, 5):
                        for l in range(0, 5):
                            uniform.addG(
                                Gaussian([i, j, k, l],
                                         [[4, 0, 0, 0], [0, 4, 0, 0],
                                          [0, 0, 4, 0], [0, 0, 0, 4]], 1))

            count = 0

            #until convergence
            while (not self.ValueFunc.comp(comparision) and count < 30):
                print(count)
                comparision = copy.deepcopy(self.ValueFunc)
                count += 1
                #print(count);
                maxVal = -10000000
                maxGM = GM()
                for a in range(0, 2):
                    suma = GM()
                    for g in self.ValueFunc.Gs:
                        mean = (np.matrix(g.mean) -
                                np.matrix(self.delA[a])).tolist()
                        var = (np.matrix(g.var) +
                               np.matrix(self.delAVar)).tolist()
                        suma.addG(Gaussian(mean, var, g.weight))
                    suma.addGM(self.r)
                    tmpVal = self.continuousDot(uniform, suma)
                    if (tmpVal > maxVal):
                        maxVal = tmpVal
                        maxGM = copy.deepcopy(suma)

                maxGM.scalerMultiply(self.discount)
                maxGM = maxGM.kmeansCondensationN(20)
                self.ValueFunc = copy.deepcopy(maxGM)

            #self.ValueFunc.display();
            #self.ValueFunc.plot2D();
            print("MDP Value Iteration Complete")
            #f = open("../policies/MDP4DIntercept.npy","w");
            #np.save(f,self.ValueFunc);
            file = "policies/MDP4DIntercept"
            self.ValueFunc.printGMArrayToFile([self.ValueFunc], file)
        else:
            #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist();
            file = "policies/MDP4DIntercept"
            tmp = GM()
            self.ValueFunc = tmp.readGMArray4D(file)[0]