Example #1
0
def main(network):
    print "network: " + network.kind

    ## reset
    np.random.seed(0)
    random.seed(0)

    ## setup system
    model = PJ()
    system = systems.System(dc(network),dc(model))
    numNodes = network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

    gamma = 0.9


    ## function approx agent
    qfa = QFnApprox(features.Features3(),agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    eps = 0.1
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(),eps)

    ## estimate policies
    itersPerRep = 1000
    for i in range(1000):
        ## function approx
        qfa.iterAndSolve(dc(system),epsAgent,nIter=itersPerRep,gamma=gamma,
                         addToCvGroups=True,ridgePen=True)

        np.savetxt(dataDump.dd.open("beta"+str((i+1)*itersPerRep)+".csv","w"),
                   qfa.beta,delimiter=",")
        print "Saved beta with % 10d iters" % ((i+1)*itersPerRep)



    dataDump.dd.clean(force=True)
Example #2
0
def main(network, nIters):
    print "network: " + network.kind

    ## reset
    np.random.seed(0)
    random.seed(0)

    ## setup system
    model = PJ()
    system = systems.System(dc(network), dc(model))
    numNodes = network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)

    gamma = 0.9

    ## no treatment and random agents
    compAgents = {}
    compAgents["noTrt"] = agents.NoTrtAgent()
    compAgents["random"] = agents.RandomAgent()
    compAgents["myopic"] = agents.MyopicAgent()
    compAgents["proximal"] = agents.ProximalAgent()

    ## optimal agent
    optPol = PolicyIteration2.solve(dc(system), gamma=gamma)
    optAgent = agents.PolAgent.fromPol(optPol)
    compAgents["optimal"] = optAgent

    eps = 0.1
    ## sarsa agent
    qSarsa = QSarsa(dc(system), gamma=gamma, lam=0.5, eps=eps)

    ## function approx agent
    # qfa = QFnApprox(features.Features2(),dc(optAgent))
    qfa = QFnApprox(features.Features3(), agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    # epsAgent = agents.EpsAgent.fromAgent(optAgent,eps)
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(), eps)

    ## estimate policies
    past = 0
    for nIt in nIters:
        print "iters: % 8d" % nIt
        ## sarsa
        qSarsa.iter(nIt - past)
        name = "qSarsa" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qSarsa.q)

        ## function approx
        qfa.iterAndSolve(dc(system), epsAgent, nIter=nIt - past, gamma=gamma, addToCvGroups=True, ridgePen=True)
        name = "qFnApprox" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qfa.getQ(dc(system)))

        ## step-wise greedy
        name = "swGreedy" + str(nIt)
        compAgents[name] = agents.SwGreedyAgent.fromFn(dc(qfa.qHatSA))

        past = nIt

    results = compareAgents(system, compAgents, 1000, 100, 1.0)

    resSort = []
    for a in results:
        resSort.append((results[a]["mean"], results[a]["stdev"], a))

    resSort = sorted(resSort, reverse=True)

    print "% 24s  % 16s  % 16s" % ("Agent", "Mean", "Stdev")
    for r in resSort:
        print "% 24s  % 16.7f  % 16.7f" % (r[2], r[0], r[1])

    resFile = "results_" + network.kind + ".txt"
    dataDump.dd("Agent,Mean,Stdev\n", resFile, "w")
    for res in resSort:
        dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")