Python QFnApprox Examples

Programming Language: Python

Namespace/Package Name: qFnApprox

Class/Type: QFnApprox

Examples at hotexamples.com: 2

Python QFnApprox - 2 examples found. These are the top rated real world Python examples of qFnApprox.QFnApprox extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

startCvGroups(2)

iterAndSolve(2)

getQ(1)

Example #1

Show file

File: test.py Project: nickjmeyer/stdmMf

def main(network):
    print "network: " + network.kind

    ## reset
    np.random.seed(0)
    random.seed(0)

    ## setup system
    model = PJ()
    system = systems.System(dc(network),dc(model))
    numNodes = network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

    gamma = 0.9


    ## function approx agent
    qfa = QFnApprox(features.Features3(),agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    eps = 0.1
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(),eps)

    ## estimate policies
    itersPerRep = 1000
    for i in range(1000):
        ## function approx
        qfa.iterAndSolve(dc(system),epsAgent,nIter=itersPerRep,gamma=gamma,
                         addToCvGroups=True,ridgePen=True)

        np.savetxt(dataDump.dd.open("beta"+str((i+1)*itersPerRep)+".csv","w"),
                   qfa.beta,delimiter=",")
        print "Saved beta with % 10d iters" % ((i+1)*itersPerRep)



    dataDump.dd.clean(force=True)

Example #2

Show file

File: main.py Project: nickjmeyer/stdmMf

def main(network, nIters):
    print "network: " + network.kind

    ## reset
    np.random.seed(0)
    random.seed(0)

    ## setup system
    model = PJ()
    system = systems.System(dc(network), dc(model))
    numNodes = network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)

    gamma = 0.9

    ## no treatment and random agents
    compAgents = {}
    compAgents["noTrt"] = agents.NoTrtAgent()
    compAgents["random"] = agents.RandomAgent()
    compAgents["myopic"] = agents.MyopicAgent()
    compAgents["proximal"] = agents.ProximalAgent()

    ## optimal agent
    optPol = PolicyIteration2.solve(dc(system), gamma=gamma)
    optAgent = agents.PolAgent.fromPol(optPol)
    compAgents["optimal"] = optAgent

    eps = 0.1
    ## sarsa agent
    qSarsa = QSarsa(dc(system), gamma=gamma, lam=0.5, eps=eps)

    ## function approx agent
    # qfa = QFnApprox(features.Features2(),dc(optAgent))
    qfa = QFnApprox(features.Features3(), agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    # epsAgent = agents.EpsAgent.fromAgent(optAgent,eps)
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(), eps)

    ## estimate policies
    past = 0
    for nIt in nIters:
        print "iters: % 8d" % nIt
        ## sarsa
        qSarsa.iter(nIt - past)
        name = "qSarsa" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qSarsa.q)

        ## function approx
        qfa.iterAndSolve(dc(system), epsAgent, nIter=nIt - past, gamma=gamma, addToCvGroups=True, ridgePen=True)
        name = "qFnApprox" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qfa.getQ(dc(system)))

        ## step-wise greedy
        name = "swGreedy" + str(nIt)
        compAgents[name] = agents.SwGreedyAgent.fromFn(dc(qfa.qHatSA))

        past = nIt

    results = compareAgents(system, compAgents, 1000, 100, 1.0)

    resSort = []
    for a in results:
        resSort.append((results[a]["mean"], results[a]["stdev"], a))

    resSort = sorted(resSort, reverse=True)

    print "% 24s  % 16s  % 16s" % ("Agent", "Mean", "Stdev")
    for r in resSort:
        print "% 24s  % 16.7f  % 16.7f" % (r[2], r[0], r[1])

    resFile = "results_" + network.kind + ".txt"
    dataDump.dd("Agent,Mean,Stdev\n", resFile, "w")
    for res in resSort:
        dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")