コード例 #1
0
ファイル: policyIteration2.py プロジェクト: nickjmeyer/stdmMf
    def calcR(system):
        numNodes = system.network.numNodes
        numTrt = agents.Agent.numTrt(system)
        numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)

        r = [0.0] * ((1 << numNodes) * numValidTrt)
        for s in range(1 << numNodes):
            system.infCmb(cmb=s)
            for aInd in range(numValidTrt):
                aCmb = util.ind2Combo(aInd, numNodes, numTrt)
                system.trtCmb(cmb=aCmb)
                probs = system.model.transProbs(system)
                rSA = [None] * (1 << numNodes)
                for sp in range(1 << numNodes):
                    diff = s ^ sp
                    ind = 1
                    prob = [None] * numNodes
                    for i in range(numNodes):
                        if diff & ind:
                            prob[i] = probs[i]
                        else:
                            prob[i] = 1.0 - probs[i]
                        ind <<= 1
                    prob = reduce(operator.mul, sorted(prob, reverse=True))
                    rSA[sp] = systems.reward(s, aCmb, sp, numNodes) * prob
                rSA = sum(sorted(rSA))

                r[s * numValidTrt + aInd] = rSA

        return r
コード例 #2
0
ファイル: policyIteration2.py プロジェクト: nickjmeyer/stdmMf
    def calcP(system):
        numNodes = system.network.numNodes
        numTrt = agents.Agent.numTrt(system)
        numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)

        p = [[0.0] * (1 << numNodes) for i in range((1 << numNodes) * numValidTrt)]
        for s in range(1 << numNodes):
            system.infCmb(cmb=s)
            for aInd in range(numValidTrt):
                aCmb = util.ind2Combo(aInd, numNodes, numTrt)
                system.trtCmb(cmb=aCmb)
                probs = system.model.transProbs(system)

                for sp in range(1 << numNodes):
                    diff = s ^ sp
                    ind = 1
                    prob = [None] * numNodes
                    for i in range(numNodes):
                        if diff & ind:
                            prob[i] = probs[i]
                        else:
                            prob[i] = 1.0 - probs[i]
                        ind <<= 1

                    prob = reduce(operator.mul, sorted(prob, reverse=True))

                    p[s * numValidTrt + aInd][sp] = prob

        return p
コード例 #3
0
ファイル: qSarsa.py プロジェクト: nickjmeyer/stdmMf
    def iter(self,nIter = 1):
        for n in range(nIter):
            rt = self.system.turnClock()

            stp1 = self.system.infCmb()
            if np.random.rand() > self.eps:
                maxQ = max(self.q[stp1])
                atp1 = np.random.choice([a for a,qa in enumerate(self.q[stp1])
                                         if qa == maxQ])
            else:
                atp1 = np.random.choice(range(self.numValidTrt))

            c = util.ind2Combo(atp1,self.numNodes,self.numTrt)
            self.system.trtCmb(cmb = c)

            delta = rt + self.gamma*self.q[stp1][atp1]
            delta -= self.q[self.st][self.at]

            self.e[self.st][self.at] += 1
            self.c[self.st][self.at] += 1

            for s in range(1 << self.numNodes):
                for a in range(self.numValidTrt):
                    alpha = 1.0 / (1.0 + self.c[s][a] + self.e[s][a])
                    self.q[s][a] += alpha * delta * self.e[s][a]
                    self.e[s][a] *= self.gamma * self.lam

            self.st = stp1
            self.at = atp1

            self.iters += 1
コード例 #4
0
ファイル: agents.py プロジェクト: nickjmeyer/stdmMf
    def applyTrt(self,system):
        system.wipeTrt()
        numNodes = system.network.numNodes
        numTrt = Agent.numTrt(system)

        aInd = np.random.choice(self.pol[system.infCmb()])
        aCmb = util.ind2Combo(aInd,numNodes,numTrt)
        system.trtCmb(cmb=aCmb)
コード例 #5
0
ファイル: qFnApprox.py プロジェクト: nickjmeyer/stdmMf
    def getQ(self,system):
        numNodes = system.network.numNodes
        numTrt = agents.Agent.numTrt(system)
        numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

        q = [[None]*numValidTrt for i in range(1 << numNodes)]
        for s in range(1 << numNodes):
            system.infCmb(cmb=s)
            for a in range(numValidTrt):
                c = util.ind2Combo(a,numNodes,numTrt)
                system.trtCmb(cmb=c)

                q[s][a] = self.qHatSA(system)
        return q
コード例 #6
0
ファイル: agents.py プロジェクト: nickjmeyer/stdmMf
    def getTrtOptions(self,system):
        numTrt = Agent.numTrt(system)

        numNodes = system.network.numNodes

        numValidTrt = Agent.numValidTrt(numNodes,numTrt)

        ## get the infected index
        infInd = system.infCmb()

        ## get all treatment combinations that max the Q function
        maxQ = max(self.q[infInd])
        maxQCombo = [util.ind2Combo(i,numNodes,numTrt)
                     for i,j in enumerate(self.q[infInd])
                     if j == maxQ]

        return maxQCombo
コード例 #7
0
ファイル: valueIteration.py プロジェクト: nickjmeyer/stdmMf
    def calcPAndR(system,trtInd):
        numNodes = system.network.numNodes
        numTrt = Agent.numTrt(system)
        numValidTrt = Agent.numValidTrt(numNodes,numTrt)


        P = np.zeros((1 << numNodes,1 << numNodes))
        R = np.zeros((1 << numNodes,))

        trtCmb = util.ind2Combo(trtInd,numNodes,numTrt)
        system.trtCmb(cmb = trtCmb)
        for s in range(1 << numNodes):
            system.infCmb(cmb = s)
            probs = system.model.transProbs(system)
            for sp in range(1 << numNodes):
                changes = s ^ sp
                prob = 0.0
                ind = 1
                for i in range(numNodes):
                    if changes & ind:
                        if probs[i] < 1e-13:
                            prob += -30
                        else:
                            prob += np.log(probs[i])
                    else:
                        if 1.0 - probs[i] < 1e-13:
                            prob += -30
                        else:
                            prob += np.log(1.0 - probs[i])
                    ind <<= 1

                prob = np.exp(prob)


                P[s,sp] = prob

                r = reward(s,trtCmb,sp,numNodes)
                R[s] += prob * r

        return P,R
コード例 #8
0
ファイル: qSarsa.py プロジェクト: nickjmeyer/stdmMf
    def __init__(self,system, gamma = 0.9, lam = 0.9, eps = 0.05):
        self.system = dc(system)
        self.numTrt = agents.Agent.numTrt(system)
        self.numNodes = system.network.numNodes
        self.numValidTrt = agents.Agent.numValidTrt(self.numNodes,self.numTrt)

        self.iters = 0

        self.q = [[0]*self.numValidTrt for i in range(1 << self.numNodes)]
        self.e = [[0]*self.numValidTrt for i in range(1 << self.numNodes)]
        self.c = [[0]*self.numValidTrt for i in range(1 << self.numNodes)]

        self.gamma = gamma
        self.lam = lam
        self.eps = eps

        self.system.cleanse()
        self.system.wipeTrt()
        self.system.start()

        self.st = self.system.infCmb()
        self.at = np.random.choice(range(self.numValidTrt))
        self.system.trtCmb(cmb=util.ind2Combo(self.at,self.numNodes,
                                              self.numTrt))