def run(): from networks import genGridNetwork from model import PJ system = System(genGridNetwork((2,2)),PJ()) sCopy = dc(system) numNodes = system.network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt) gamma = 0.9 optPol = unflattenQ(PolicyIteration2.solve(system,gamma=gamma), numNodes,numValidTrt) optAgent = agents.PolAgent.fromPol(optPol) eqAgent = agents.EpsAgent.fromAgent(optAgent,0.1) qsarsa = QSarsa(system,gamma=0.9,lam=0.5,eps=0.2) qsarsa.iter(nIter=1000000) qfa = QFnApprox(Features2(),eqAgent) qfa.startCvGroups(10) avgQ = [abs(qsa) for qs in q for qsa in qs] avgQ = sum(avgQ)/float(len(avgQ)) R = 1000 I = 10000 for r in range(R): qfa.iterAndSolve(system,eqAgent,nIter=I,gamma=gamma, addToCvGroups=True,ridgePen=True) print "% 9d" % (r*I + I) diff = 0.0 cnt = 0 for s in range(1 << numNodes): sCopy.infCmb(cmb=s) for a in range(numValidTrt): c = ind2Combo(a,numNodes,numTrt) sCopy.trtCmb(cmb=c) qHat = qfa.qHat(sCopy) if qHat is None: break diff += abs(qHat - qAgent.q[s][a]) cnt += 1 if qHat is None: break if qHat is not None: print " qRes: % 6.3f" % (diff/float(cnt),) print " bellRes: % 6.3f" % bellRes(system.history, system, qAgent, qfa.qHat, gamma) laQfn = lambda system : ( la.q[system.infCmb()][combo2Ind(system.trtCmb(), numNodes,numTrt)]) print " trueRes: % 6.3f" % bellRes(system.history, system, qAgent, laQfn, gamma) sarsaQfn = lambda system : ( qsarsa.q[system.infCmb()][combo2Ind(system.trtCmb(), numNodes,numTrt)]) print "sarsaRes: % 6.4f" % bellRes(system.history, system, qAgent, sarsaQfn, gamma)
def main(network, nIters): print "network: " + network.kind ## reset np.random.seed(0) random.seed(0) ## setup system model = PJ() system = systems.System(dc(network), dc(model)) numNodes = network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt) gamma = 0.9 ## no treatment and random agents compAgents = {} compAgents["noTrt"] = agents.NoTrtAgent() compAgents["random"] = agents.RandomAgent() compAgents["myopic"] = agents.MyopicAgent() compAgents["proximal"] = agents.ProximalAgent() ## optimal agent optPol = PolicyIteration2.solve(dc(system), gamma=gamma) optAgent = agents.PolAgent.fromPol(optPol) compAgents["optimal"] = optAgent eps = 0.1 ## sarsa agent qSarsa = QSarsa(dc(system), gamma=gamma, lam=0.5, eps=eps) ## function approx agent # qfa = QFnApprox(features.Features2(),dc(optAgent)) qfa = QFnApprox(features.Features3(), agents.MyopicAgent()) qfa.startCvGroups(10) ## epsilon greedy agent # epsAgent = agents.EpsAgent.fromAgent(optAgent,eps) epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(), eps) ## estimate policies past = 0 for nIt in nIters: print "iters: % 8d" % nIt ## sarsa qSarsa.iter(nIt - past) name = "qSarsa" + str(nIt) compAgents[name] = agents.QAgent.fromQ(qSarsa.q) ## function approx qfa.iterAndSolve(dc(system), epsAgent, nIter=nIt - past, gamma=gamma, addToCvGroups=True, ridgePen=True) name = "qFnApprox" + str(nIt) compAgents[name] = agents.QAgent.fromQ(qfa.getQ(dc(system))) ## step-wise greedy name = "swGreedy" + str(nIt) compAgents[name] = agents.SwGreedyAgent.fromFn(dc(qfa.qHatSA)) past = nIt results = compareAgents(system, compAgents, 1000, 100, 1.0) resSort = [] for a in results: resSort.append((results[a]["mean"], results[a]["stdev"], a)) resSort = sorted(resSort, reverse=True) print "% 24s % 16s % 16s" % ("Agent", "Mean", "Stdev") for r in resSort: print "% 24s % 16.7f % 16.7f" % (r[2], r[0], r[1]) resFile = "results_" + network.kind + ".txt" dataDump.dd("Agent,Mean,Stdev\n", resFile, "w") for res in resSort: dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")