Python Actor.ChooseActionByPolicy Examples

Programming Language: Python

Namespace/Package Name: Actor

Class/Type: Actor

Method/Function: ChooseActionByPolicy

Examples at hotexamples.com: 2

Python Actor.ChooseActionByPolicy - 2 examples found. These are the top rated real world Python examples of Actor.Actor.ChooseActionByPolicy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Actor(30)

__init__(15)

train(15)

parameters(14)

choose_action(8)

learn(8)

predict(7)

train_fn(6)

eval(6)

state_dict(5)

load_state_dict(4)

load(4)

_loss_(3)

update(3)

setPos(2)

ChooseActionByPolicy(2)

save(2)

setcollaabb(2)

sethitaabb(2)

get_shortest_actor(2)

model(2)

build_net(2)

doAttack(1)

target_predict(1)

save_cache(1)

setLocation(1)

addtriumphs(1)

setUnqTags(1)

set_brain(1)

set_goal(1)

setaabb(1)

addedToWorld(1)

add_grad_to_graph(1)

addMovie(1)

stop(1)

take_damage_shortest_point(1)

target_actions(1)

target_predict_method(1)

run(1)

transfer_to_actor_model(1)

update_target(1)

update_actor_target(1)

updatePolicy(1)

act(1)

unpickleActors(1)

triumph(1)

train_p(1)

target_update_method(1)

action(1)

train_2(1)

Example #1

Show file

File: General_Actor_Critic.py Project: NikZy/IT3105-AiProg

def DoEpisodes(episodes,
               boardSize,
               maxRemovePegs,
               boardType,
               epsilon=0.5,
               learningRate=0.9,
               policyTable={},
               valueTable={}):
    TotalError = 0
    stepsTaken = 1

    actor = Actor(0.9, learningRate, epsilon, policyTable)
    critic = Critic(0.9, learningRate, valueTable)

    for i in range(episodes):
        world = GetRandomizedBoard(boardSize, maxRemovePegs, boardType)

        actor.resetEligibility()
        critic.resetEligibility()
        critic.tdError = 0
        reward = 0
        state = world.stateToHash()

        chosenAction = actor.ChooseActionByPolicy(world)

        while True:
            reward = world.makeAction(chosenAction)
            nextAction = actor.ChooseActionByPolicy(world)
            nextState = world.stateToHash()

            actor.eligibility[state + str(chosenAction)] = 1
            critic.updateTDError(reward, state, nextState)
            critic.eligibility[state] = 1
            TotalError += abs(critic.tdError)
            for SAP in world.getGameLog():

                critic.updateValue(SAP)
                critic.decayEligibility(SAP)

                actor.updatePolicy(SAP, critic.tdError)
                actor.decayEligibility(SAP)

            if reward == 10:
                #print(world.startRemoveLocations, stepsTaken, world.getGameLog()[-1].stateHash)
                updateSolvableStates(boardType + str(boardSize),
                                     world.startRemoveLocations)
            if chosenAction == None:
                break
            chosenAction = nextAction
            state = nextState
            stepsTaken += 1

        print('Episode:', i, 'MeanError', TotalError / stepsTaken)

    WriteTables(critic.getValueTable(), actor.getPolicyTable(), boardType,
                boardSize)

Example #2

Show file

File: General_Actor_Critic.py Project: NikZy/IT3105-AiProg

def TestModel(boardSize, maxRemovePegs, boardType, index):

    _, actorTable = ReadTables(boardType, boardSize)
    actor = Actor(0.9, 0.1, 0, actorTable)
    stepNumber = 0
    #world = GetRandomizedBoard(boardSize, maxRemovePegs, boardType)
    world = GetSolvableBoard(boardSize, boardType, index)
    chosenAction = actor.ChooseActionByPolicy(world)

    visualizer.VisualizePegs(world.getState(), stepNumber)
    reward = 0
    while True:
        world.makeAction(chosenAction)
        visualizer.VisualizePegs(world.getState(), stepNumber, chosenAction)
        chosenAction = actor.ChooseActionByPolicy(world)
        if chosenAction == None:
            endstate = str(world._boardState.state)
            reward = world.makeAction(chosenAction)
            print("EndState:", endstate, 'reward:', reward)
            break
        stepNumber += 1
    visualizer.GenerateVideo(stepNumber, index)
    return reward