def calcR(system): numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt) r = [0.0] * ((1 << numNodes) * numValidTrt) for s in range(1 << numNodes): system.infCmb(cmb=s) for aInd in range(numValidTrt): aCmb = util.ind2Combo(aInd, numNodes, numTrt) system.trtCmb(cmb=aCmb) probs = system.model.transProbs(system) rSA = [None] * (1 << numNodes) for sp in range(1 << numNodes): diff = s ^ sp ind = 1 prob = [None] * numNodes for i in range(numNodes): if diff & ind: prob[i] = probs[i] else: prob[i] = 1.0 - probs[i] ind <<= 1 prob = reduce(operator.mul, sorted(prob, reverse=True)) rSA[sp] = systems.reward(s, aCmb, sp, numNodes) * prob rSA = sum(sorted(rSA)) r[s * numValidTrt + aInd] = rSA return r
def bellRes(iters,system,agent,q,gamma = 0.9): system = dc(system) numNodes = system.network.numNodes iters = iter(iters) sa = iters.next() system.infCmb(cmb = sa[0]) system.trtCmb(cmb = sa[1]) q0 = q(system) br = 0.0 nIters = 0 for sap in iters: system.infCmb(cmb = sap[0]) system.wipeTrt() r = systems.reward(sa[0],sa[1],sap[0],numNodes) agent.applyTrt(system) q1 = q(system) br += q0 - r - gamma * q1 nIters += 1 br /= float(nIters) return br
def addIter(self,newIters,system,gamma = 0.9, addToCvGroups = False): system = dc(system) numNodes = system.network.numNodes iters = iter(newIters) sa = iters.next() system.infCmb(cmb = sa[0]) system.trtCmb(cmb = sa[1]) f0 = self.features.getFeatures(system) for sap in iters: system.infCmb(cmb = sap[0]) system.wipeTrt() r = systems.reward(sa[0],sa[1],sap[0],numNodes) self.agent.applyTrt(system) f1 = self.features.getFeatures(system) fd = f0 - gamma * f1 if self.nIters: self.X += f0 * fd.T self.Y += f0 * r else: self.X = f0 * fd.T self.Y = f0 * r if addToCvGroups: group = self.getGroup() for i in range(self.nGroups): if i == group: if self.cvXtest[group] is None: self.cvXtest[group] = f0 * fd.T else: self.cvXtest[group] += f0 * fd.T if self.cvYtest[group] is None: self.cvYtest[group] = f0 * r else: self.cvYtest[group] += f0 * r else: if self.cvXtrain[group] is None: self.cvXtrain[group] = f0 * fd.T else: self.cvXtrain[group] += f0 * fd.T if self.cvYtrain[group] is None: self.cvYtrain[group] = f0 * r else: self.cvYtrain[group] += f0 * r self.cvXtXtest[group] = None self.cvXtYtest[group] = None self.cvXtXtrain[group] = None self.cvXtYtrain[group] = None self.nIters += 1 sa = sap system.trtCmb(cmb = sa[1]) f0 = self.features.getFeatures(system)