def main(network): print "network: " + network.kind ## reset np.random.seed(0) random.seed(0) ## setup system model = PJ() system = systems.System(dc(network),dc(model)) numNodes = network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt) gamma = 0.9 ## function approx agent qfa = QFnApprox(features.Features3(),agents.MyopicAgent()) qfa.startCvGroups(10) ## epsilon greedy agent eps = 0.1 epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(),eps) ## estimate policies itersPerRep = 1000 for i in range(1000): ## function approx qfa.iterAndSolve(dc(system),epsAgent,nIter=itersPerRep,gamma=gamma, addToCvGroups=True,ridgePen=True) np.savetxt(dataDump.dd.open("beta"+str((i+1)*itersPerRep)+".csv","w"), qfa.beta,delimiter=",") print "Saved beta with % 10d iters" % ((i+1)*itersPerRep) dataDump.dd.clean(force=True)
def main(network, nIters): print "network: " + network.kind ## reset np.random.seed(0) random.seed(0) ## setup system model = PJ() system = systems.System(dc(network), dc(model)) numNodes = network.numNodes numTrt = agents.Agent.numTrt(system) numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt) gamma = 0.9 ## no treatment and random agents compAgents = {} compAgents["noTrt"] = agents.NoTrtAgent() compAgents["random"] = agents.RandomAgent() compAgents["myopic"] = agents.MyopicAgent() compAgents["proximal"] = agents.ProximalAgent() ## optimal agent optPol = PolicyIteration2.solve(dc(system), gamma=gamma) optAgent = agents.PolAgent.fromPol(optPol) compAgents["optimal"] = optAgent eps = 0.1 ## sarsa agent qSarsa = QSarsa(dc(system), gamma=gamma, lam=0.5, eps=eps) ## function approx agent # qfa = QFnApprox(features.Features2(),dc(optAgent)) qfa = QFnApprox(features.Features3(), agents.MyopicAgent()) qfa.startCvGroups(10) ## epsilon greedy agent # epsAgent = agents.EpsAgent.fromAgent(optAgent,eps) epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(), eps) ## estimate policies past = 0 for nIt in nIters: print "iters: % 8d" % nIt ## sarsa qSarsa.iter(nIt - past) name = "qSarsa" + str(nIt) compAgents[name] = agents.QAgent.fromQ(qSarsa.q) ## function approx qfa.iterAndSolve(dc(system), epsAgent, nIter=nIt - past, gamma=gamma, addToCvGroups=True, ridgePen=True) name = "qFnApprox" + str(nIt) compAgents[name] = agents.QAgent.fromQ(qfa.getQ(dc(system))) ## step-wise greedy name = "swGreedy" + str(nIt) compAgents[name] = agents.SwGreedyAgent.fromFn(dc(qfa.qHatSA)) past = nIt results = compareAgents(system, compAgents, 1000, 100, 1.0) resSort = [] for a in results: resSort.append((results[a]["mean"], results[a]["stdev"], a)) resSort = sorted(resSort, reverse=True) print "% 24s % 16s % 16s" % ("Agent", "Mean", "Stdev") for r in resSort: print "% 24s % 16.7f % 16.7f" % (r[2], r[0], r[1]) resFile = "results_" + network.kind + ".txt" dataDump.dd("Agent,Mean,Stdev\n", resFile, "w") for res in resSort: dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")