def beliefUpdate(self,b,a,o):
		btmp = GM(); 

		for obs in self.pz[o].Gs:
			for bel in b.Gs:
				sj = np.matrix(bel.mean).T; 
				si = np.matrix(obs.mean).T; 
				delA = np.matrix(self.delA[a]).T; 
				sigi = np.matrix(obs.var); 
				sigj = np.matrix(bel.var); 
				delAVar = np.matrix(self.delAVar); 

				weight = obs.weight*bel.weight; 
				weight = weight*mvn.pdf((sj+delA).T.tolist()[0],si.T.tolist()[0],np.add(sigi,sigj,delAVar)); 
				var = (sigi.I + (sigj+delAVar).I).I; 
				mean = var*(sigi.I*si + (sigj+delAVar).I*(sj+delA)); 
				weight = weight.tolist(); 
				mean = mean.T.tolist()[0]; 
				var = var.tolist();
				 

				btmp.addG(Gaussian(mean,var,weight)); 
		btmp.normalizeWeights(); 
		btmp = btmp.kmeansCondensationN(self.maxMix); 
		#btmp.condense(maxMix); 
		btmp.normalizeWeights();
		return btmp; 
Example #2
0
    def beliefUpdate(self, b, a, o, maxMix=10):

        btmp = GM()

        for i in self.pz[o].Gs:
            for j in b.Gs:

                tmp = mvn.pdf(
                    np.add(np.matrix(j.mean),
                           np.matrix(self.delA[a])).tolist(), i.mean,
                    self.covAdd(self.covAdd(i.var, j.var), self.delAVar))
                #print(i.weight,j.weight,tmp);
                w = i.weight * j.weight * tmp.tolist()

                sig = (np.add(
                    np.matrix(i.var).I,
                    np.matrix(self.covAdd(j.var, self.delAVar)).I)).I.tolist()

                #sstmp = np.matrix(i.var).I*np.transpose(i.mean) + np.matrix(self.covAdd(j.var + self.delAVar)).I*np.transpose(np.add(np.matrix(j.mean),np.matrix(delA[a])));
                sstmp1 = np.matrix(i.var).I * np.transpose(np.matrix(i.mean))
                sstmp2 = np.matrix(self.covAdd(j.var, self.delAVar)).I
                sstmp21 = np.add(np.matrix(j.mean), np.matrix(self.delA[a]))

                sstmp3 = sstmp1 + sstmp2 * np.transpose(sstmp21)
                smean = np.transpose(sig * sstmp3).tolist()[0]

                btmp.addG(Gaussian(smean, sig, w))

        btmp = btmp.kmeansCondensationN(maxMix)
        #btmp.condense(maxMix);
        btmp.normalizeWeights()

        return btmp
Example #3
0
    def beliefUpdate(self, b, a, o, mod):
        btmp = GM()

        for obs in mod.pz[o].Gs:
            for bel in b.Gs:
                sj = np.matrix(bel.mean).T
                si = np.matrix(obs.mean).T
                delA = np.matrix(mod.delA[a]).T
                sigi = np.matrix(obs.var)
                sigj = np.matrix(bel.var)
                delAVar = np.matrix(mod.delAVar)

                weight = obs.weight * bel.weight
                weight = weight * mvn.pdf(
                    (sj + delA).T.tolist()[0],
                    si.T.tolist()[0], np.add(sigi, sigj, delAVar))
                var = (sigi.I + (sigj + delAVar).I).I
                mean = var * (sigi.I * si + (sigj + delAVar).I * (sj + delA))
                weight = weight.tolist()
                mean = mean.T.tolist()[0]
                var = var.tolist()

                btmp.addG(Gaussian(mean, var, weight))
        btmp.normalizeWeights()
        btmp = btmp.kmeansCondensationN(1)
        #btmp.condense(maxMix);
        btmp.normalizeWeights()
        return btmp
def backup(als, modes, delA, delAVar, pz, r, maxMix, b):

    newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))]
              for k in range(0, len(als))]

    for i in range(0, len(als)):
        for j in range(0, len(delA[0])):
            for k in range(0, len(pz)):
                newAls[i][j][k] = GM()

                for h in modes:
                    tmpGM = als[i].GMProduct(pz[j])
                    mean = tmpGM.getMeans()
                    for l in range(0, len(mean)):
                        mean[l][0] -= delA[modes.index(h)][j]
                        mean[l] = mean[l][0]
                    var = tmpGM.getVars()
                    for l in range(0, len(var)):
                        var[l][0][0] += delAVar
                        var[l] = var[l][0][0]
                    weights = tmpGM.getWeights()
                    tmpGM2 = GM()
                    for l in range(0, len(mean)):
                        tmpGM2.addG(Gaussian(mean[l], var[l], weights[l]))
                    #tmpGM2 = GM(mean,var,tmpGM.getWeights());

                    newAls[i][j][k].addGM(tmpGM2.GMProduct(h))

    bestVal = -10000000000
    bestAct = 0
    bestGM = []

    for a in range(0, len(delA[0])):
        suma = GM()
        for o in range(0, len(pz)):
            suma.addGM(newAls[np.argmax([
                continuousDot(newAls[j][a][o], b)
                for j in range(0, len(newAls))
            ])][a][o])
        suma.scalerMultiply(0.9)
        suma.addGM(r[a])

        for g in suma.Gs:
            if (isinstance(g.mean, list)):
                g.mean = g.mean[0]
                g.var = g.var[0][0]

        suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20)

        tmp = continuousDot(suma, b)
        #print(a,tmp);
        if (tmp > bestVal):
            bestAct = a
            bestGM = copy.deepcopy(suma)
            bestVal = tmp

    bestGM.action = bestAct

    return bestGM
	def beliefUpdateSoftmax(self,b,a,o):

		btmp = GM(); 
		btmp1 = GM(); 
		for j in b.Gs:
			mean = (np.matrix(j.mean) + np.matrix(self.delA[a])).tolist()[0]; 
			var = (np.matrix(j.var) + np.matrix(self.delAVar)).tolist(); 
			weight = j.weight; 
			btmp1.addG(Gaussian(mean,var,weight)); 
		btmp = self.pz2.runVBND(btmp1,o); 
		
		#btmp.condense(maxMix);
		btmp = btmp.kmeansCondensationN(self.maxMix);  
		btmp.normalizeWeights();

		return btmp; 
Example #6
0
def beliefUpdate(modes,delA,delAVar,pz,bels,a,o,cond = -1):
	
	#Initialize
	btmp = GM(); 

	for d in bels.Gs:
		for h in modes:
			for f in h.Gs:
				for l in pz[o].Gs:
					C1 = 1/(1/f.var + 1/d.var);
					c1 = C1*((1/f.var)*f.mean + (1/d.var)*d.mean); 

					C2 = C1 + delAVar; 
					c2 = c1+delA[modes.index(h)][a]; 

					weight = d.weight*f.weight*l.weight*mvn.pdf(l.mean,c2,l.var+C2); 

					var = 1/((1/l.var)+(1/C2)); 
					mean = var*((1/l.var)*l.mean + (1/C2)*c2); 

					g = Gaussian(mean,var,weight); 
					btmp.addG(g); 


	btmp.normalizeWeights(); 


	if(cond != -1):
		btmp = btmp.kmeansCondensationN(k=cond,lowInit = -20,highInit=20); 
		#btmp.condense(cond); 

	for g in btmp:
		while(isinstance(g.var,list)):
			g.var = g.var[0]; 

	btmp.display(); 

	return btmp; 
Example #7
0
def backup(als,modes,delA,delAVar,pz,r,maxMix,b):
	
	newAls = [[[0 for i in range(0,len(pz))] for j in range(0,len(delA[0]))] for k in range(0,len(als))]; 

	for i in range(0,len(als)):
		for j in range(0,len(delA[0])):
			for k in range(0,len(pz)):
				newAls[i][j][k] = GM(); 
				

				for h in range(0,len(modes.weights)):
					#print(als[i].getVars()); 
					tmp1 = modes.runVB(als[i],h); 
					for l in range(0,pz[k].size):
						for p in range(0,tmp1.size):
							mixp = tmp1.Gs[p]; 
							mixl = pz[k].Gs[l]; 

							weight1 = mixp.weight*mixl.weight;  
							weight = weight1*mvn.pdf(mixp.mean,mixl.mean,mixp.var+mixl.var); 

							c2 = (mixp.var**-1 + mixl.var**-1)**-1; 
							c1 = c2*(mixp.var**-1 * mixp.mean + mixl.var**-1 * mixl.mean); 

							mean = c1-delA[h][j]; 
							var = c2+delAVar; 
							

							newAls[i][j][k].addG(Gaussian(mean,var,weight)); 




	bestVal = -10000000000; 
	bestAct= 0; 
	bestGM = []; 

	for a in range(0,len(delA[0])):
		suma = GM(); 
		for o in range(0,len(pz)):
			suma.addGM(newAls[np.argmax([continuousDot(newAls[j][a][o],b) for j in range(0,len(newAls))])][a][o]); 
		suma.scalerMultiply(0.9); 
		suma.addGM(r[a]); 

		for g in suma.Gs:
			if(isinstance(g.mean,list)):
				g.mean = g.mean[0]; 
			if(isinstance(g.var,list)):
				g.var = g.var[0][0]; 


		suma = suma.kmeansCondensationN(k=maxMix,lowInit = -20,highInit=20); 

		tmp = continuousDot(suma,b);
		#print(a,tmp); 
		if(tmp > bestVal):
			bestAct = a; 
			bestGM = copy.deepcopy(suma); 
			bestVal = tmp; 

	bestGM.action = bestAct; 

	return bestGM; 
Example #8
0
    def MDPValueIteration(self, gen=True):
        if (gen):
            #Intialize Value function
            self.ValueFunc = copy.deepcopy(self.r)
            for g in self.ValueFunc.Gs:
                g.weight = -1000

            comparision = GM()
            comparision.addG(
                Gaussian(
                    [1, 0, 0, 0],
                    [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
                    1))

            uniform = GM()
            for i in range(0, 5):
                for j in range(0, 5):
                    for k in range(0, 5):
                        for l in range(0, 5):
                            uniform.addG(
                                Gaussian([i, j, k, l],
                                         [[4, 0, 0, 0], [0, 4, 0, 0],
                                          [0, 0, 4, 0], [0, 0, 0, 4]], 1))

            count = 0

            #until convergence
            while (not self.ValueFunc.comp(comparision) and count < 30):
                print(count)
                comparision = copy.deepcopy(self.ValueFunc)
                count += 1
                #print(count);
                maxVal = -10000000
                maxGM = GM()
                for a in range(0, 2):
                    suma = GM()
                    for g in self.ValueFunc.Gs:
                        mean = (np.matrix(g.mean) -
                                np.matrix(self.delA[a])).tolist()
                        var = (np.matrix(g.var) +
                               np.matrix(self.delAVar)).tolist()
                        suma.addG(Gaussian(mean, var, g.weight))
                    suma.addGM(self.r)
                    tmpVal = self.continuousDot(uniform, suma)
                    if (tmpVal > maxVal):
                        maxVal = tmpVal
                        maxGM = copy.deepcopy(suma)

                maxGM.scalerMultiply(self.discount)
                maxGM = maxGM.kmeansCondensationN(20)
                self.ValueFunc = copy.deepcopy(maxGM)

            #self.ValueFunc.display();
            #self.ValueFunc.plot2D();
            print("MDP Value Iteration Complete")
            #f = open("../policies/MDP4DIntercept.npy","w");
            #np.save(f,self.ValueFunc);
            file = "policies/MDP4DIntercept"
            self.ValueFunc.printGMArrayToFile([self.ValueFunc], file)
        else:
            #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist();
            file = "policies/MDP4DIntercept"
            tmp = GM()
            self.ValueFunc = tmp.readGMArray4D(file)[0]