def calcR(system): numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt) r = [0.0] * ((1 << numNodes) * numValidTrt) for s in range(1 << numNodes): system.infCmb(cmb=s) for aInd in range(numValidTrt): aCmb = util.ind2Combo(aInd, numNodes, numTrt) system.trtCmb(cmb=aCmb) probs = system.model.transProbs(system) rSA = [None] * (1 << numNodes) for sp in range(1 << numNodes): diff = s ^ sp ind = 1 prob = [None] * numNodes for i in range(numNodes): if diff & ind: prob[i] = probs[i] else: prob[i] = 1.0 - probs[i] ind <<= 1 prob = reduce(operator.mul, sorted(prob, reverse=True)) rSA[sp] = systems.reward(s, aCmb, sp, numNodes) * prob rSA = sum(sorted(rSA)) r[s * numValidTrt + aInd] = rSA return r
def calcP(system): numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt) p = [[0.0] * (1 << numNodes) for i in range((1 << numNodes) * numValidTrt)] for s in range(1 << numNodes): system.infCmb(cmb=s) for aInd in range(numValidTrt): aCmb = util.ind2Combo(aInd, numNodes, numTrt) system.trtCmb(cmb=aCmb) probs = system.model.transProbs(system) for sp in range(1 << numNodes): diff = s ^ sp ind = 1 prob = [None] * numNodes for i in range(numNodes): if diff & ind: prob[i] = probs[i] else: prob[i] = 1.0 - probs[i] ind <<= 1 prob = reduce(operator.mul, sorted(prob, reverse=True)) p[s * numValidTrt + aInd][sp] = prob return p
def iter(self,nIter = 1): for n in range(nIter): rt = self.system.turnClock() stp1 = self.system.infCmb() if np.random.rand() > self.eps: maxQ = max(self.q[stp1]) atp1 = np.random.choice([a for a,qa in enumerate(self.q[stp1]) if qa == maxQ]) else: atp1 = np.random.choice(range(self.numValidTrt)) c = util.ind2Combo(atp1,self.numNodes,self.numTrt) self.system.trtCmb(cmb = c) delta = rt + self.gamma*self.q[stp1][atp1] delta -= self.q[self.st][self.at] self.e[self.st][self.at] += 1 self.c[self.st][self.at] += 1 for s in range(1 << self.numNodes): for a in range(self.numValidTrt): alpha = 1.0 / (1.0 + self.c[s][a] + self.e[s][a]) self.q[s][a] += alpha * delta * self.e[s][a] self.e[s][a] *= self.gamma * self.lam self.st = stp1 self.at = atp1 self.iters += 1
def applyTrt(self,system): system.wipeTrt() numNodes = system.network.numNodes numTrt = Agent.numTrt(system) aInd = np.random.choice(self.pol[system.infCmb()]) aCmb = util.ind2Combo(aInd,numNodes,numTrt) system.trtCmb(cmb=aCmb)
def getQ(self,system): numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt) q = [[None]*numValidTrt for i in range(1 << numNodes)] for s in range(1 << numNodes): system.infCmb(cmb=s) for a in range(numValidTrt): c = util.ind2Combo(a,numNodes,numTrt) system.trtCmb(cmb=c) q[s][a] = self.qHatSA(system) return q
def getTrtOptions(self,system): numTrt = Agent.numTrt(system) numNodes = system.network.numNodes numValidTrt = Agent.numValidTrt(numNodes,numTrt) ## get the infected index infInd = system.infCmb() ## get all treatment combinations that max the Q function maxQ = max(self.q[infInd]) maxQCombo = [util.ind2Combo(i,numNodes,numTrt) for i,j in enumerate(self.q[infInd]) if j == maxQ] return maxQCombo
def calcPAndR(system,trtInd): numNodes = system.network.numNodes numTrt = Agent.numTrt(system) numValidTrt = Agent.numValidTrt(numNodes,numTrt) P = np.zeros((1 << numNodes,1 << numNodes)) R = np.zeros((1 << numNodes,)) trtCmb = util.ind2Combo(trtInd,numNodes,numTrt) system.trtCmb(cmb = trtCmb) for s in range(1 << numNodes): system.infCmb(cmb = s) probs = system.model.transProbs(system) for sp in range(1 << numNodes): changes = s ^ sp prob = 0.0 ind = 1 for i in range(numNodes): if changes & ind: if probs[i] < 1e-13: prob += -30 else: prob += np.log(probs[i]) else: if 1.0 - probs[i] < 1e-13: prob += -30 else: prob += np.log(1.0 - probs[i]) ind <<= 1 prob = np.exp(prob) P[s,sp] = prob r = reward(s,trtCmb,sp,numNodes) R[s] += prob * r return P,R
def __init__(self,system, gamma = 0.9, lam = 0.9, eps = 0.05): self.system = dc(system) self.numTrt = agents.Agent.numTrt(system) self.numNodes = system.network.numNodes self.numValidTrt = agents.Agent.numValidTrt(self.numNodes,self.numTrt) self.iters = 0 self.q = [[0]*self.numValidTrt for i in range(1 << self.numNodes)] self.e = [[0]*self.numValidTrt for i in range(1 << self.numNodes)] self.c = [[0]*self.numValidTrt for i in range(1 << self.numNodes)] self.gamma = gamma self.lam = lam self.eps = eps self.system.cleanse() self.system.wipeTrt() self.system.start() self.st = self.system.infCmb() self.at = np.random.choice(range(self.numValidTrt)) self.system.trtCmb(cmb=util.ind2Combo(self.at,self.numNodes, self.numTrt))