Python QSarsa Examples

Programming Language: Python

Namespace/Package Name: qSarsa

Class/Type: QSarsa

Examples at hotexamples.com: 2

Python QSarsa - 2 examples found. These are the top rated real world Python examples of qSarsa.QSarsa extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

iter(2)

Example #1

Show file

File: qFnApprox.py Project: nickjmeyer/stdmMf

def run():
    from networks import genGridNetwork
    from model import PJ

    system = System(genGridNetwork((2,2)),PJ())
    sCopy = dc(system)

    numNodes = system.network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

    gamma = 0.9

    optPol = unflattenQ(PolicyIteration2.solve(system,gamma=gamma),
                        numNodes,numValidTrt)

    optAgent = agents.PolAgent.fromPol(optPol)

    eqAgent = agents.EpsAgent.fromAgent(optAgent,0.1)

    qsarsa = QSarsa(system,gamma=0.9,lam=0.5,eps=0.2)

    qsarsa.iter(nIter=1000000)


    qfa = QFnApprox(Features2(),eqAgent)
    qfa.startCvGroups(10)

    avgQ = [abs(qsa) for qs in q for qsa in qs]
    avgQ = sum(avgQ)/float(len(avgQ))



    R = 1000
    I = 10000
    for r in range(R):
        qfa.iterAndSolve(system,eqAgent,nIter=I,gamma=gamma,
                         addToCvGroups=True,ridgePen=True)
        print "% 9d" % (r*I + I)

        diff = 0.0
        cnt = 0
        for s in range(1 << numNodes):
            sCopy.infCmb(cmb=s)

            for a in range(numValidTrt):
                c = ind2Combo(a,numNodes,numTrt)
                sCopy.trtCmb(cmb=c)

                qHat = qfa.qHat(sCopy)

                if qHat is None:
                    break

                diff += abs(qHat - qAgent.q[s][a])
                cnt += 1

            if qHat is None:
                break

        if qHat is not None:
            print "    qRes: % 6.3f" % (diff/float(cnt),)
            print " bellRes: % 6.3f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               qfa.qHat,
                                               gamma)
            laQfn = lambda system : (
                la.q[system.infCmb()][combo2Ind(system.trtCmb(),
                                                     numNodes,numTrt)])
            print " trueRes: % 6.3f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               laQfn,
                                               gamma)
            sarsaQfn = lambda system : (
                qsarsa.q[system.infCmb()][combo2Ind(system.trtCmb(),
                                                    numNodes,numTrt)])
            print "sarsaRes: % 6.4f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               sarsaQfn,
                                               gamma)

Example #2

Show file

File: main.py Project: nickjmeyer/stdmMf

def main(network, nIters):
    print "network: " + network.kind

    ## reset
    np.random.seed(0)
    random.seed(0)

    ## setup system
    model = PJ()
    system = systems.System(dc(network), dc(model))
    numNodes = network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)

    gamma = 0.9

    ## no treatment and random agents
    compAgents = {}
    compAgents["noTrt"] = agents.NoTrtAgent()
    compAgents["random"] = agents.RandomAgent()
    compAgents["myopic"] = agents.MyopicAgent()
    compAgents["proximal"] = agents.ProximalAgent()

    ## optimal agent
    optPol = PolicyIteration2.solve(dc(system), gamma=gamma)
    optAgent = agents.PolAgent.fromPol(optPol)
    compAgents["optimal"] = optAgent

    eps = 0.1
    ## sarsa agent
    qSarsa = QSarsa(dc(system), gamma=gamma, lam=0.5, eps=eps)

    ## function approx agent
    # qfa = QFnApprox(features.Features2(),dc(optAgent))
    qfa = QFnApprox(features.Features3(), agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    # epsAgent = agents.EpsAgent.fromAgent(optAgent,eps)
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(), eps)

    ## estimate policies
    past = 0
    for nIt in nIters:
        print "iters: % 8d" % nIt
        ## sarsa
        qSarsa.iter(nIt - past)
        name = "qSarsa" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qSarsa.q)

        ## function approx
        qfa.iterAndSolve(dc(system), epsAgent, nIter=nIt - past, gamma=gamma, addToCvGroups=True, ridgePen=True)
        name = "qFnApprox" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qfa.getQ(dc(system)))

        ## step-wise greedy
        name = "swGreedy" + str(nIt)
        compAgents[name] = agents.SwGreedyAgent.fromFn(dc(qfa.qHatSA))

        past = nIt

    results = compareAgents(system, compAgents, 1000, 100, 1.0)

    resSort = []
    for a in results:
        resSort.append((results[a]["mean"], results[a]["stdev"], a))

    resSort = sorted(resSort, reverse=True)

    print "% 24s  % 16s  % 16s" % ("Agent", "Mean", "Stdev")
    for r in resSort:
        print "% 24s  % 16.7f  % 16.7f" % (r[2], r[0], r[1])

    resFile = "results_" + network.kind + ".txt"
    dataDump.dd("Agent,Mean,Stdev\n", resFile, "w")
    for res in resSort:
        dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")