Ejemplo n.º 1
0
	def backup(self,b):
		G = self.Gamma; 
		R = self.r; 
		pz = self.pz; 

		if(self.useSoft):
			obslen = pz.size; 
		else:
			obslen = len(pz); 

		als1 = self.preAls; 
		

		bestVal = -10000000000; 
		bestAct= 0; 
		bestGM = []; 

		for a in range(0,len(self.delA)):
			suma = GM(); 
			for o in range(0,obslen):
				suma.addGM(als1[np.argmax([self.continuousDot(als1[j][a][o],b) for j in range(0,len(als1))])][a][o]); 
			suma.scalerMultiply(self.discount); 
			suma.addGM(R[a]); 

			tmp = self.continuousDot(suma,b);
			#print(a,tmp); 
			if(tmp > bestVal):
				bestAct = a; 
				bestGM = copy.deepcopy(suma); 
				bestVal = tmp; 

		bestGM.action = bestAct; 

		return bestGM;  
Ejemplo n.º 2
0
	def backupFactored(self,b):
		G = self.Gamma; 
		R = self.r; 
		pz = self.pz; 


		als1 = self.preAls; 

		bestVal = -10000000000; 
		bestAct= [0,0]; 
		bestGM = []; 

		for am in range(0,len(self.delA)):
			for aq in range(0,8):
				suma = GM(); 
				for oq in range(0,2):
					suma.addGM(als1[np.argmax([self.continuousDot(als1[j][am][aq][oq],b) for j in range(0,len(als1))])][am][aq][oq]); 
				suma.scalerMultiply(self.discount); 
				suma.addGM(R[am]); 

				tmp = self.continuousDot(suma,b);
				#print(a,tmp); 
				if(tmp > bestVal):
					bestAct = [am,aq]; 
					bestGM = copy.deepcopy(suma); 
					bestVal = tmp; 

		bestGM.action = bestAct; 

		return bestGM;
Ejemplo n.º 3
0
    def backup(self, b):
        G = self.Gamma
        R = self.r
        pz = self.pz

        als1 = self.preAls

        #one alpha for each belief, so one per backup

        bestVal = -10000000000
        bestAct = 0
        bestGM = []

        for a in range(0, len(self.delA)):
            suma = GM()
            for o in range(0, len(pz)):
                suma.addGM(als1[np.argmax([
                    self.continuousDot(als1[j][a][o], b)
                    for j in range(0, len(als1))
                ])][a][o])
            suma.scalerMultiply(self.discount)
            suma.addGM(R)

            tmp = self.continuousDot(suma, b)
            #print(a,tmp);
            if (tmp > bestVal):
                bestAct = a
                bestGM = suma
                bestVal = tmp

        bestGM.action = bestAct

        return bestGM
def backup(als, modes, delA, delAVar, pz, r, maxMix, b):

    newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))]
              for k in range(0, len(als))]

    for i in range(0, len(als)):
        for j in range(0, len(delA[0])):
            for k in range(0, len(pz)):
                newAls[i][j][k] = GM()

                for h in modes:
                    tmpGM = als[i].GMProduct(pz[j])
                    mean = tmpGM.getMeans()
                    for l in range(0, len(mean)):
                        mean[l][0] -= delA[modes.index(h)][j]
                        mean[l] = mean[l][0]
                    var = tmpGM.getVars()
                    for l in range(0, len(var)):
                        var[l][0][0] += delAVar
                        var[l] = var[l][0][0]
                    weights = tmpGM.getWeights()
                    tmpGM2 = GM()
                    for l in range(0, len(mean)):
                        tmpGM2.addG(Gaussian(mean[l], var[l], weights[l]))
                    #tmpGM2 = GM(mean,var,tmpGM.getWeights());

                    newAls[i][j][k].addGM(tmpGM2.GMProduct(h))

    bestVal = -10000000000
    bestAct = 0
    bestGM = []

    for a in range(0, len(delA[0])):
        suma = GM()
        for o in range(0, len(pz)):
            suma.addGM(newAls[np.argmax([
                continuousDot(newAls[j][a][o], b)
                for j in range(0, len(newAls))
            ])][a][o])
        suma.scalerMultiply(0.9)
        suma.addGM(r[a])

        for g in suma.Gs:
            if (isinstance(g.mean, list)):
                g.mean = g.mean[0]
                g.var = g.var[0][0]

        suma = suma.kmeansCondensationN(k=maxMix, lowInit=-20, highInit=20)

        tmp = continuousDot(suma, b)
        #print(a,tmp);
        if (tmp > bestVal):
            bestAct = a
            bestGM = copy.deepcopy(suma)
            bestVal = tmp

    bestGM.action = bestAct

    return bestGM
    def backup(self, b):
        G = self.Gamma
        R = self.r
        pz = self.pz

        if (self.useSoft):
            obslen = pz.size
        else:
            obslen = len(pz)

        als1 = self.preAls

        bestVal = -10000000000
        bestAct = 0
        bestGM = []

        for a in range(0, len(self.delA)):
            suma = GM()
            for o in range(0, obslen):
                suma.addGM(als1[np.argmax([
                    self.continuousDot(als1[j][a][o], b)
                    for j in range(0, len(als1))
                ])][a][o])
            suma.scalerMultiply(self.discount)
            suma.addGM(R[a])

            tmp = self.continuousDot(suma, b)
            #print(a,tmp);
            if (tmp > bestVal):
                bestAct = a
                bestGM = copy.deepcopy(suma)
                bestVal = tmp

        bestGM.action = bestAct

        return bestGM
def backup(als, modes, delA, delAVar, pz, r, maxMix, b):

    newAls = [[[0 for i in range(0, len(pz))] for j in range(0, len(delA[0]))]
              for k in range(0, len(als))]

    for i in range(0, len(als)):
        for j in range(0, len(delA[0])):
            for k in range(0, len(pz)):
                newAls[i][j][k] = GM()

                for h in range(0, len(modes.weights)):
                    #print(als[i].getVars());
                    tmp1 = modes.runVB(als[i], h)
                    for l in range(0, pz[k].size):
                        for p in range(0, tmp1.size):
                            mixp = tmp1.Gs[p]
                            mixl = pz[k].Gs[l]

                            weight1 = mixp.weight * mixl.weight
                            weight = weight1 * mvn.pdf(mixp.mean, mixl.mean,
                                                       mixp.var + mixl.var)

                            c2 = (mixp.var**-1 + mixl.var**-1)**-1
                            c1 = c2 * (mixp.var**-1 * mixp.mean +
                                       mixl.var**-1 * mixl.mean)

                            mean = c1 - delA[h][j]
                            var = c2 + delAVar

                            newAls[i][j][k].addG(Gaussian(mean, var, weight))

    bestVal = -10000000000
    bestAct = 0
    bestGM = []

    for a in range(0, len(delA[0])):
        suma = GM()
        for o in range(0, len(pz)):
            suma.addGM(newAls[np.argmax([
                continuousDot(newAls[j][a][o], b)
                for j in range(0, len(newAls))
            ])][a][o])
        suma.scalerMultiply(0.9)
        suma.addGM(r[a])

        for g in suma.Gs:
            if (isinstance(g.mean, list)):
                g.mean = g.mean[0]
            if (isinstance(g.var, list)):
                g.var = g.var[0][0]

        #suma = suma.kmeansCondensationN(k=maxMix,lowInit = -20,highInit=20);

        tmp = continuousDot(suma, b)
        #print(a,tmp);
        if (tmp > bestVal):
            bestAct = a
            bestGM = copy.deepcopy(suma)
            bestVal = tmp

    bestGM.action = bestAct

    return bestGM
Ejemplo n.º 7
0
    def MDPValueIteration(self, gen=True):
        if (gen):
            #Intialize Value function
            self.ValueFunc = copy.deepcopy(self.r)
            for g in self.ValueFunc.Gs:
                g.weight = -1000

            comparision = GM()
            comparision.addG(
                Gaussian(
                    [1, 0, 0, 0],
                    [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
                    1))

            uniform = GM()
            for i in range(0, 5):
                for j in range(0, 5):
                    for k in range(0, 5):
                        for l in range(0, 5):
                            uniform.addG(
                                Gaussian([i, j, k, l],
                                         [[4, 0, 0, 0], [0, 4, 0, 0],
                                          [0, 0, 4, 0], [0, 0, 0, 4]], 1))

            count = 0

            #until convergence
            while (not self.ValueFunc.comp(comparision) and count < 30):
                print(count)
                comparision = copy.deepcopy(self.ValueFunc)
                count += 1
                #print(count);
                maxVal = -10000000
                maxGM = GM()
                for a in range(0, 2):
                    suma = GM()
                    for g in self.ValueFunc.Gs:
                        mean = (np.matrix(g.mean) -
                                np.matrix(self.delA[a])).tolist()
                        var = (np.matrix(g.var) +
                               np.matrix(self.delAVar)).tolist()
                        suma.addG(Gaussian(mean, var, g.weight))
                    suma.addGM(self.r)
                    tmpVal = self.continuousDot(uniform, suma)
                    if (tmpVal > maxVal):
                        maxVal = tmpVal
                        maxGM = copy.deepcopy(suma)

                maxGM.scalerMultiply(self.discount)
                maxGM = maxGM.kmeansCondensationN(20)
                self.ValueFunc = copy.deepcopy(maxGM)

            #self.ValueFunc.display();
            #self.ValueFunc.plot2D();
            print("MDP Value Iteration Complete")
            #f = open("../policies/MDP4DIntercept.npy","w");
            #np.save(f,self.ValueFunc);
            file = "policies/MDP4DIntercept"
            self.ValueFunc.printGMArrayToFile([self.ValueFunc], file)
        else:
            #self.ValueFunc = np.load("../policies/MDP4DIntercept.npy").tolist();
            file = "policies/MDP4DIntercept"
            tmp = GM()
            self.ValueFunc = tmp.readGMArray4D(file)[0]