Python QSarsa 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: qSarsa

클래스/타입: QSarsa

hotexamples.com에서의 예제들: 2

Python QSarsa - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 qSarsa.QSarsa에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

iter(2)

예제 #1

파일 보기

파일: qFnApprox.py 프로젝트: nickjmeyer/stdmMf

def run():
    from networks import genGridNetwork
    from model import PJ

    system = System(genGridNetwork((2,2)),PJ())
    sCopy = dc(system)

    numNodes = system.network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

    gamma = 0.9

    optPol = unflattenQ(PolicyIteration2.solve(system,gamma=gamma),
                        numNodes,numValidTrt)

    optAgent = agents.PolAgent.fromPol(optPol)

    eqAgent = agents.EpsAgent.fromAgent(optAgent,0.1)

    qsarsa = QSarsa(system,gamma=0.9,lam=0.5,eps=0.2)

    qsarsa.iter(nIter=1000000)


    qfa = QFnApprox(Features2(),eqAgent)
    qfa.startCvGroups(10)

    avgQ = [abs(qsa) for qs in q for qsa in qs]
    avgQ = sum(avgQ)/float(len(avgQ))



    R = 1000
    I = 10000
    for r in range(R):
        qfa.iterAndSolve(system,eqAgent,nIter=I,gamma=gamma,
                         addToCvGroups=True,ridgePen=True)
        print "% 9d" % (r*I + I)

        diff = 0.0
        cnt = 0
        for s in range(1 << numNodes):
            sCopy.infCmb(cmb=s)

            for a in range(numValidTrt):
                c = ind2Combo(a,numNodes,numTrt)
                sCopy.trtCmb(cmb=c)

                qHat = qfa.qHat(sCopy)

                if qHat is None:
                    break

                diff += abs(qHat - qAgent.q[s][a])
                cnt += 1

            if qHat is None:
                break

        if qHat is not None:
            print "    qRes: % 6.3f" % (diff/float(cnt),)
            print " bellRes: % 6.3f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               qfa.qHat,
                                               gamma)
            laQfn = lambda system : (
                la.q[system.infCmb()][combo2Ind(system.trtCmb(),
                                                     numNodes,numTrt)])
            print " trueRes: % 6.3f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               laQfn,
                                               gamma)
            sarsaQfn = lambda system : (
                qsarsa.q[system.infCmb()][combo2Ind(system.trtCmb(),
                                                    numNodes,numTrt)])
            print "sarsaRes: % 6.4f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               sarsaQfn,
                                               gamma)

예제 #2

파일 보기

파일: main.py 프로젝트: nickjmeyer/stdmMf

def main(network, nIters):
    print "network: " + network.kind

    ## reset
    np.random.seed(0)
    random.seed(0)

    ## setup system
    model = PJ()
    system = systems.System(dc(network), dc(model))
    numNodes = network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)

    gamma = 0.9

    ## no treatment and random agents
    compAgents = {}
    compAgents["noTrt"] = agents.NoTrtAgent()
    compAgents["random"] = agents.RandomAgent()
    compAgents["myopic"] = agents.MyopicAgent()
    compAgents["proximal"] = agents.ProximalAgent()

    ## optimal agent
    optPol = PolicyIteration2.solve(dc(system), gamma=gamma)
    optAgent = agents.PolAgent.fromPol(optPol)
    compAgents["optimal"] = optAgent

    eps = 0.1
    ## sarsa agent
    qSarsa = QSarsa(dc(system), gamma=gamma, lam=0.5, eps=eps)

    ## function approx agent
    # qfa = QFnApprox(features.Features2(),dc(optAgent))
    qfa = QFnApprox(features.Features3(), agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    # epsAgent = agents.EpsAgent.fromAgent(optAgent,eps)
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(), eps)

    ## estimate policies
    past = 0
    for nIt in nIters:
        print "iters: % 8d" % nIt
        ## sarsa
        qSarsa.iter(nIt - past)
        name = "qSarsa" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qSarsa.q)

        ## function approx
        qfa.iterAndSolve(dc(system), epsAgent, nIter=nIt - past, gamma=gamma, addToCvGroups=True, ridgePen=True)
        name = "qFnApprox" + str(nIt)
        compAgents[name] = agents.QAgent.fromQ(qfa.getQ(dc(system)))

        ## step-wise greedy
        name = "swGreedy" + str(nIt)
        compAgents[name] = agents.SwGreedyAgent.fromFn(dc(qfa.qHatSA))

        past = nIt

    results = compareAgents(system, compAgents, 1000, 100, 1.0)

    resSort = []
    for a in results:
        resSort.append((results[a]["mean"], results[a]["stdev"], a))

    resSort = sorted(resSort, reverse=True)

    print "% 24s  % 16s  % 16s" % ("Agent", "Mean", "Stdev")
    for r in resSort:
        print "% 24s  % 16.7f  % 16.7f" % (r[2], r[0], r[1])

    resFile = "results_" + network.kind + ".txt"
    dataDump.dd("Agent,Mean,Stdev\n", resFile, "w")
    for res in resSort:
        dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")