Ejemplo n.º 1
0
                                   "Value Iteration {}".format(nIter))
        if vi.latestDelta < 1e-6 and flag:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory,
                                   "Value Iteration {}".format(nIter))
            flag = False
            # dumpPolicyMap(MapPrinter.printPolicyMap(allStates, p, gen.getMap()),
            #               'Value {} Iter {} Policy Map.pkl'.format(world, nIter))
        # if vi.latestDelta < 1e-6:
        #     break
    print "\n\n\n"
    simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory,
                           "Value Iteration {}".format(nIter))
    dumpCSV(nIter, timing['Value'][1:], rewards['Value'], steps['Value'],
            convergence['Value'], world, 'Value')

    pi = PolicyIteration(domain, rf, tf, discount, hashingFactory, 1e-3, 10, 1)
    pi.toggleUseCachedTransitionDynamics(False)
    print "//{} Policy Iteration Analysis//".format(world)
    timing['Policy'].append(0)
    flag = True
    for nIter in iterations:
        startTime = clock()
        p = pi.planFromState(initialState)
        #timing['Policy'].append((clock() - startTime) * 1000)
        timing['Policy'].append(timing['Policy'][-1] + clock() - startTime)
        policy = pi.getComputedPolicy()
        current_policy = {
            state: policy.getAction(state).toString()
            for state in allStates
        }
        convergence['Policy2'].append(pi.lastPIDelta)
        # evaluate the policy with evalTrials roll outs
        runEvals(initialState, p, rewards['Value'], steps['Value'])
        if nIter == 1:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
        if nIter == MAX_ITERATIONS / 2:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
    MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap());
    print "\n\n\n"
    simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
#     dumpCSV(iterations, timing['Value'], rewards['Value'], steps['Value'], convergence['Value'], world, 'Value')
  
#   Policy Iteration starts
    print "//Easy Policy Iteration Analysis//"
    for nIter in iterations:
        startTime = clock()
        pi = PolicyIteration(domain,rf,tf,discount,hashingFactory,-1,1, nIter); #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms.
        # run planning from our initial state
        pi.setDebugCode(0)
        p = pi.planFromState(initialState);
        timing['Policy'].append((clock()-startTime)*1000)
        convergence['Policy'].append(pi.lastPIDelta)         
        # evaluate the policy 
        runEvals(initialState,p,rewards['Policy'],steps['Policy'])
        if (nIter == 1):
            simpleValueFunctionVis(pi, p, initialState, domain, hashingFactory, "Policy Iteration{}".format(nIter))
        if (nIter == MAX_ITERATIONS/2):
            simpleValueFunctionVis(pi, p, initialState, domain, hashingFactory, "Policy Iteration{}".format(nIter))
        policy = pi.getComputedPolicy()
        allStates = pi.getAllStates()
        current_policy = [[(action.ga, action.pSelection) for action in policy.getActionDistributionForState(state)] for state in allStates]
        policy_converged['Policy'].append(current_policy == last_policy)
Ejemplo n.º 3
0
        MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap())
        print "\n\n\n"
        simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory,
                               Vname)
        #input('x')
        dumpCSV(iterations, timing[Vname], rewards[Vname], steps[Vname],
                convergence[Vname], world, Vname)

    discount = 0.99

    print "//Easy Policy Iteration Analysis//"
    for discount in [0.99, 0.90, 0.7, 0.4]:
        Pname = 'Policy gmma{:0.2f}'.format(discount)
        for nIter in iterations:
            startTime = clock()
            pi = PolicyIteration(domain, rf, tf, discount, hashingFactory, -1,
                                 1, nIter)
            #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms.
            # run planning from our initial state
            pi.setDebugCode(0)
            p = pi.planFromState(initialState)
            timing[Pname].append((clock() - startTime) * 1000)
            convergence[Pname].append(pi.lastPIDelta)
            # evaluate the policy with one roll out visualize the trajectory
            runEvals(initialState, p, rewards[Pname], steps[Pname])
            #ea = p.evaluateBehavior(initialState, rf, tf);
            #rewards['Policy'].append(calcRewardInEpisode(ea));
            #steps['Policy'].append(ea.numTimeSteps());
            if (nIter == 1):
                simpleValueFunctionVis(pi, p, initialState, domain,
                                       hashingFactory,
                                       "Policy Iteration{}".format(nIter))
Ejemplo n.º 4
0
        vi.setDebugCode(0)
        p = vi.planFromState(initialState)
        timing['Value'].append(clock() - startTime)
        iterations['Value'].append(vi.numIterations)
        # evaluate the policy with one roll out visualize the trajectory
        runEvals(initialState, p, rewards['Value'], steps['Value'])

        MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap())
        print("\n\n")
        #   simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration")

        dumpCSV(iterations['Value'], timing['Value'], rewards['Value'],
                steps['Value'], [n], 'Value', n == 2)

        print("//Size Policy Iteration Analysis//")
        pi = PolicyIteration(domain, rf, tf, discount, hashingFactory, 1e-6,
                             20, MAX_ITERATIONS)
        #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms.
        pi.toggleUseCachedTransitionDynamics(False)
        # run planning from our initial state
        pi.setDebugCode(0)
        pi.setPolicyToEvaluate(p)
        startTime = clock()
        p = pi.planFromState(initialState)
        timing['Policy'].append(clock() - startTime)
        iterations['Policy'].append(pi.totalPolicyIterations)
        # evaluate the policy with one roll out visualize the trajectory
        runEvals(initialState, p, rewards['Policy'], steps['Policy'])
        MapPrinter.printPolicyMap(pi.getAllStates(), p, gen.getMap())
        print("\n\n")
        #simpleValueFunctionVis(pi, p, initialState, domain, hashingFactory, "Policy Iteration")
        dumpCSV(iterations['Policy'], timing['Policy'], rewards['Policy'],