def unitTest(cls): print "Testing QAgent" from system import System from networks import genGridNetwork from model import PJ s = System(genGridNetwork((4,3)),PJ()) s.cleanse() s.infections[2] = True s.infections[4] = True s.infections[7] = True numTrt = Agent.numTrt(s) numNodes = s.network.numNodes assert numNodes == 12, ("numNodes is not 12") assert numTrt == 3, ("numTrt is not 3") maxInfInd = 1 << numNodes maxTrtInd = Agent.numValidTrt(numNodes,numTrt) q = [[0]*maxTrtInd for i in range(maxInfInd)] for i in range(maxInfInd): for t in range(maxTrtInd): q[i][t] = t trtNodes = [9,10,11] a = cls.fromQ(q) a.applyTrt(s) if s.nTrt > len(trtNodes): raise ValueError("QAgent gave too many treatments ") elif s.nTrt < len(trtNodes): raise ValueError("QAgent gave too few treatments") else: for i in trtNodes: if not s.treatments[i]: raise ValueError("QAgent did not treat node " + i)
def unitTest(cls): print "Testing ValueIteration" np.random.seed(0) from system import System from networks import genGridNetwork from model import PJ system = System(genGridNetwork((3,3)),PJ()) numNodes = system.network.numNodes numTrt = Agent.numTrt(system) numValidTrt = Agent.numValidTrt(numNodes,numTrt) v = ValueIteration.solve(dc(system)) q = PolicyIteration.solve(dc(system)) q = util.unflattenQ(q,numNodes,numValidTrt) vChk = [max(i) for i in q] for i in zip(v,vChk): print "% 12.6f % 10.6f" % i
def unitTest(cls): print "Testing System" np.random.seed(0) random.seed(0) system = System(genGridNetwork((2,2)),PJ()) numNodes = system.network.numNodes reps = 50000 tol = math.sqrt(0.5**2/float(reps))*2.05 for s in range(1 << numNodes): print "Checking state % 4d" % s system.infCmb(cmb=s) probs = system.model.transProbs(system) pastInf = dc(system.infections) avgChange = [0]*numNodes for r in range(reps): system.infCmb(cmb=s) system.turnClock(probs=probs) for n in range(numNodes): avgChange[n] += system.infections[n] != pastInf[n] avgChange = [float(i)/float(reps) for i in avgChange] diff = sum(abs(i - j) for i,j in zip(probs,avgChange)) diff /= float(numNodes) if diff > tol: for i,j in zip(probs,avgChange): print "true: % 8.6f, sim: % 8.6f" % (i,j) raise ValueError(("Simulation doesn't match model probabilities" + " for state %d with absolute total" + " difference of %f") % (s,diff))
def run(): from networks import genGridNetwork from model import PJ system = System(genGridNetwork((2,2)),PJ()) sCopy = dc(system) numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt) gamma = 0.9 optPol = unflattenQ(PolicyIteration2.solve(system,gamma=gamma), numNodes,numValidTrt) optAgent = agents.PolAgent.fromPol(optPol) eqAgent = agents.EpsAgent.fromAgent(optAgent,0.1) qsarsa = QSarsa(system,gamma=0.9,lam=0.5,eps=0.2) qsarsa.iter(nIter=1000000) qfa = QFnApprox(Features2(),eqAgent) qfa.startCvGroups(10) avgQ = [abs(qsa) for qs in q for qsa in qs] avgQ = sum(avgQ)/float(len(avgQ)) R = 1000 I = 10000 for r in range(R): qfa.iterAndSolve(system,eqAgent,nIter=I,gamma=gamma, addToCvGroups=True,ridgePen=True) print "% 9d" % (r*I + I) diff = 0.0 cnt = 0 for s in range(1 << numNodes): sCopy.infCmb(cmb=s) for a in range(numValidTrt): c = ind2Combo(a,numNodes,numTrt) sCopy.trtCmb(cmb=c) qHat = qfa.qHat(sCopy) if qHat is None: break diff += abs(qHat - qAgent.q[s][a]) cnt += 1 if qHat is None: break if qHat is not None: print " qRes: % 6.3f" % (diff/float(cnt),) print " bellRes: % 6.3f" % bellRes(system.history, system, qAgent, qfa.qHat, gamma) laQfn = lambda system : ( la.q[system.infCmb()][combo2Ind(system.trtCmb(), numNodes,numTrt)]) print " trueRes: % 6.3f" % bellRes(system.history, system, qAgent, laQfn, gamma) sarsaQfn = lambda system : ( qsarsa.q[system.infCmb()][combo2Ind(system.trtCmb(), numNodes,numTrt)]) print "sarsaRes: % 6.4f" % bellRes(system.history, system, qAgent, sarsaQfn, gamma)
gamma = 0.9 ## function approx agent qfa = QFnApprox(features.Features3(),agents.MyopicAgent()) qfa.startCvGroups(10) ## epsilon greedy agent eps = 0.1 epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(),eps) ## estimate policies itersPerRep = 1000 for i in range(1000): ## function approx qfa.iterAndSolve(dc(system),epsAgent,nIter=itersPerRep,gamma=gamma, addToCvGroups=True,ridgePen=True) np.savetxt(dataDump.dd.open("beta"+str((i+1)*itersPerRep)+".csv","w"), qfa.beta,delimiter=",") print "Saved beta with % 10d iters" % ((i+1)*itersPerRep) dataDump.dd.clean(force=True) if __name__ == "__main__": main(genGridNetwork((10,10)))
results = compareAgents(system, compAgents, 1000, 100, 1.0) resSort = [] for a in results: resSort.append((results[a]["mean"], results[a]["stdev"], a)) resSort = sorted(resSort, reverse=True) print "% 24s % 16s % 16s" % ("Agent", "Mean", "Stdev") for r in resSort: print "% 24s % 16.7f % 16.7f" % (r[2], r[0], r[1]) resFile = "results_" + network.kind + ".txt" dataDump.dd("Agent,Mean,Stdev\n", resFile, "w") for res in resSort: dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a") if __name__ == "__main__": shapes = [(2, 1), (3, 1), (2, 2), (4, 1), (2, 3), (6, 1), (3, 3)] multipliers = [5, 10, 25, 50] for shape in shapes: numNodes = shape[0] * shape[1] numTrt = int(0.25 * numNodes) nCombos = (1 << numNodes) * sympy.binomial(numNodes, numTrt) nIters = [m * nCombos for m in multipliers] main(genGridNetwork(shape), nIters)
def unitTest(cls): print "Testing PolicyIteration" np.random.seed(0) from networks import genGridNetwork from model import PJ from copy import deepcopy from runners import vPiS system = systems.System(genGridNetwork((2, 2)), PJ()) numNodes = system.network.numNodes p = np.array(cls.calcP(system)) r = np.array(cls.calcR(system)) one = np.ones((p.shape[1],)) pRowsum = np.dot(p, one) ## check numerical ranges on p tol = 1e-8 if np.amin(p) < 0: raise ValueError("P has negative values") if np.amax(p) > 1.0: raise ValueError("P has values greater than 1") if abs(max(pRowsum) - 1) > tol or abs(min(pRowsum) - 1) > tol: raise ValueError("Not all row sums for P are 1.0") numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt) q = util.unflattenQ(np.random.randint(numNodes, size=(1 << numNodes) * numValidTrt), numNodes, numValidTrt) pi = cls.piForPolicy(util.q2Policy(q), system) one = np.ones((pi.shape[1],)) piRowSum = pi * one ## check numerical ranges on pi if pi.max() < 0: raise ValueError("Pi has some negative values") if pi.min() > 0: raise ValueError("Pi has values greater than 1") if abs(np.amin(piRowSum) - 1) > tol or abs(np.amax(piRowSum) - 1): raise ValueError("Rows of pi do not sum to 1") ## make sure random agent estimates worse Q-values gamma = 0.9 randPol = [range(numValidTrt) for i in range(1 << numNodes)] vRa = PolicyIteration2.vForPolicy(randPol, system, gamma=gamma).tolist() polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma) vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma).tolist() cnt = sum(i > j for i, j in zip(vRa, vOpt)) if cnt > 0: raise ValueError("Random Agent does better " + "than optimal V-function %d times" % cnt) ## check that gamma = 0 results in a v-function equal to ## expected immediate rewards gamma = 0.0 polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma) vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma) pi = cls.piForPolicy(polOpt, system) if np.linalg.norm(pi.dot(r) - vOpt, 2) > 1e-10: raise ValueError("Gamma = 0 did not result in expected " + "immediate rewards") ## check analytical values with simulated values gamma = 0.5 polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma) vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma) agentOpt = agents.PolAgent.fromPol(polOpt) diff = 0.0 for s in range(1 << numNodes): print "Checking state % 4d" % s val = vPiS(s, system, agentOpt, gamma=gamma, finalT=10, reps=1000) diff += abs(vOpt[s] - val) diff /= float(1 << numNodes) # print "diff from sim: % 10.6f" % diff if diff > 0.05: raise ValueError("V values differ from sim by %f" % diff)
elif t[0] == "a": val *= system.treatments[t[1]] else: raise ValueError("Should be 's' or 'a'") if not val: break if val: data.append(val) rows.append(ind) ind += 1 cols = [0] * len(data) return sps.csr_matrix((data,(rows,cols)),shape=(ind,1)) if __name__ == "__main__": import systems import networks import model system = systems.System(networks.genGridNetwork((2,2)),model.PJ()) f3 = Features3() for i in f3.getFeatures(system): print i print len(f3.getFeatures(system))
for a in range(self.numValidTrt): alpha = 1.0 / (1.0 + self.c[s][a] + self.e[s][a]) self.q[s][a] += alpha * delta * self.e[s][a] self.e[s][a] *= self.gamma * self.lam self.st = stp1 self.at = atp1 self.iters += 1 if __name__ == "__main__": from networks import genGridNetwork from model import PJ system = System(genGridNetwork((2,2)),PJ()) numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt) gamma = 0.9 optQ = PolicyIteration.solve(system,gamma=gamma) optQ = optQ.tolist() qsarsa = QSarsa(system,epsAgent=agents.RandomAgent(), gamma=0.9,lam=0.5,eps=0.1) R = 1000