コード例 #1
0
    def generate(self, s, a):
        #sprime = np.random.choice([i for i in range(0,self.model.N)],p=self.model.px[a][s]);

        #tmpGM = GM((np.array(s) + np.array(self.model.delA)).T.tolist(),self.model.delAVar,1);
        tmpGM = GM()
        tmpGM.addG(
            Gaussian((np.array(s) + np.array(self.model.delA[a])).tolist(),
                     self.model.delAVar, 1))

        sprime = tmpGM.sample(1)[0]
        ztrial = [0] * len(self.model.pz)
        for i in range(0, len(self.model.pz)):
            ztrial[i] = self.model.pz[i].pointEval(sprime)
        z = ztrial.index(max(ztrial))
        reward = self.model.r[a].pointEval(s)
        '''
		if(a == 0 and s > 13):
			reward = 10; 
		elif(a==1 and s<13):
			reward = 10; 
		elif(a == 2 and s==13):
			reward = 100;
		else:
			reward = -10; 
		'''

        return [sprime, z, reward]
コード例 #2
0
ファイル: ContinuousHMM.py プロジェクト: clburks9/GeneralHMM
    def simulate(self, steps, initState=None):

        if (initState is None):
            initState = self.states[0]

        states = []
        obs = []

        states.append(initState)

        for step in range(0, steps):
            #get new state
            keys, vals = zip(*self.Tprob[states[-1]].items())
            states.append(np.random.choice(keys, p=vals))

            newGM = GM()
            newGM.addG(self.Oprob[self.states.index(states[-1])])
            obs.append(newGM.sample(1)[0])

        return states, obs
コード例 #3
0
    def getRolloutReward(self, s, d=1):
        reward = 0
        for i in range(0, d):
            a = np.random.randint(0, self.model.acts)
            '''
			if(s < 13):
				a = 1; 
			elif(s>13):
				a = 0; 
			else:
				a = 2; 
			'''

            reward += self.model.discount * self.model.r[a].pointEval(s)
            #s = np.random.choice([i for i in range(0,self.model.N)],p=self.model.px[a][s]);
            tmpGM = GM()
            tmpGM.addG(
                Gaussian((np.array(s) + np.array(self.model.delA[a])).tolist(),
                         self.model.delAVar, 1))

            s = tmpGM.sample(1)[0]
        return reward
コード例 #4
0
    def lwisUpdate(self, prior, softClass, numSamples, inverse=False):
        #Runs a likelihood weighted importance sampling update on a given gaussian
        q = GM()
        q.addG(Gaussian(prior.mean, prior.var, 1))

        p = GM()
        p.addG(prior)

        x = q.sample(numSamples)

        w = np.zeros(numSamples)
        for i in range(0, numSamples):
            if (not inverse):
                w[i] = p.pointEval(x[i]) * self.pointEvalND(
                    softClass, x[i]) / q.pointEval(x[i])
            else:
                w[i] = p.pointEval(x[i]) * (
                    1 - self.pointEvalND(softClass, x[i])) / q.pointEval(x[i])

        suma = sum(w)
        for i in range(0, len(w)):
            w[i] = w[i] / suma

        muHat = np.zeros(len(prior.mean))
        for i in range(0, numSamples):
            muHat = muHat + np.dot(x[i], w[i])

        varHat = np.zeros(shape=(len(prior.mean), len(prior.mean)))
        for i in range(0, numSamples):
            xi = np.asarray(x[i])
            varHat = varHat + w[i] * np.outer(xi, xi)
        varHat = varHat - np.outer(muHat, muHat)

        muHat = muHat.tolist()
        varHat = varHat.tolist()
        if (len(prior.mean) == 1):
            muHat = muHat[0]
        if (len(prior.var) == 1):
            varHat = varHat[0][0]

        #Calculate Weights
        #sample a bunch from the prior
        tmp = GM()
        tmp.addG(Gaussian(prior.mean, prior.var, 1))
        tmpSamps = tmp.sample(500)

        #Find the likelihood at each sampled point
        probs = np.zeros(500).tolist()
        for i in range(0, 500):
            if (not inverse):
                probs[i] = self.pointEvalND(softClass, tmpSamps[i])
            else:
                probs[i] = 1 - self.pointEvalND(softClass, tmpSamps[i])
        #Find the average likelihood, which is the weight factor
        sumSamp = sum(probs) / 500

        #Multiply the sampled weight factor by the previous weight
        #or add in log space
        logSamps = np.log(sumSamp)
        logWeight = np.log(prior.weight) + logSamps
        #Extract final weight
        weight = np.exp(logWeight)

        post = Gaussian(muHat, varHat, weight)

        return post
コード例 #5
0
def testMCTSSim2D():

    trails = 10
    trailLength = 100
    allReward = np.zeros(shape=(trails, trailLength)).tolist()

    random = False

    for count in range(0, trails):
        '''
		if(trails == 1):
			fig,ax = plt.subplots();
		'''

        totalReward = 0

        a = OnlineSolver()
        x1 = np.random.randint(-5, 5)
        x2 = np.random.randint(-5, 5)
        x = [x1, x2]
        b = GM()
        b.addG(Gaussian(x, [[1, 0], [0, 1]], 1))
        for step in range(0, trailLength):
            '''
			if(trails == 1):
				ax.cla(); 
				ax.plot(b,linewidth=4); 
				ax.scatter(x,.4,s=150,c='r'); 
				ax.set_ylim([0,.5]); 
				ax.set_title('POMCP Belief'); 
				plt.pause(0.1); 
			'''

            if (random):
                act = np.random.randint(0, 5)
            else:
                [act, u] = a.MCTS(b, 2)
            totalReward += a.model.r[act].pointEval(x)
            #x = np.random.choice([i for i in range(0,a.model.N)],p=a.model.px[act][x]);
            tmpGM = GM()
            tmpGM.addG(
                Gaussian((np.array(x) + np.array(a.model.delA[act])).tolist(),
                         a.model.delAVar, 1))

            x = tmpGM.sample(1)[0]

            ztrial = [0] * len(a.model.pz)
            for i in range(0, len(a.model.pz)):
                ztrial[i] = a.model.pz[i].pointEval(x)
            z = ztrial.index(max(ztrial))
            b = a.beliefUpdate(b, act, z, a.model)

            if (not random):
                #RenderTreeGraph(a.T).to_picture('tree2.png');
                a.T = [
                    node for node in PreOrderIter(a.T,
                                                  filter_=lambda n: n.name == a
                                                  .T.name + str(act) + str(z))
                ][0]

                a.T.parent = None
                #print(a.T);
                #RenderTreeGraph(a.T).to_picture('tree1.png');

            allReward[count][step] = totalReward

        print(allReward[count][-1])

    averageAllReward = [0] * trailLength
    for i in range(0, trails):
        for j in range(0, trailLength):
            averageAllReward[j] += allReward[i][j] / trails
    allSigma = [0] * trailLength

    for i in range(0, trailLength):
        suma = 0
        for j in range(0, trails):
            suma += (allReward[j][i] - averageAllReward[i])**2
        allSigma[i] = np.sqrt(suma / trails)
    UpperBound = [0] * trailLength
    LowerBound = [0] * trailLength

    for i in range(0, trailLength):
        UpperBound[i] = averageAllReward[i] + allSigma[i]
        LowerBound[i] = averageAllReward[i] - allSigma[i]

    x = [i for i in range(0, trailLength)]
    plt.figure()
    plt.plot(x, averageAllReward, 'g')
    plt.plot(x, UpperBound, 'g--')
    plt.plot(x, LowerBound, 'g--')
    plt.fill_between(x, LowerBound, UpperBound, color='g', alpha=0.25)

    plt.xlabel('Time Step')
    plt.ylabel('Accumlated Reward')
    plt.title('Average Accumulated Rewards over Time for: ' + str(trails) +
              ' simulations')

    plt.show()