Python World.performPreyMove Exemples

Langage de programmation: Python

Espace de nommage/Pack: world

Class/Type: World

Méthode/Fonction: performPreyMove

Exemples au hotexamples.com: 3

Python World.performPreyMove - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de world.World.performPreyMove extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

World(30)

stopState(7)

add(7)

__init__(6)

__str__(6)

moveList(5)

setState(5)

allStates(5)

buildLineSegmentTestWorld(4)

buildEllipse(4)

from_string(4)

singleMoveList(4)

perform_action(4)

allMoveList(4)

get_player(3)

performPreyMove(3)

addPlayer(3)

buildLineSegmentWorld(3)

nextPreyStates(2)

loadStartArea(2)

add_element(2)

init_schedules(2)

addEnemy(2)

from_snapshot(2)

add_garden(2)

loadSimulation(2)

addObject(2)

addCell(2)

apply_command(2)

to_s(2)

del_player(2)

__create__(2)

addArea(2)

buildAccelArrow(2)

get_location(2)

buildAccelSphere(2)

spawn_predator(2)

spawn_prey(2)

startSimulation(2)

buildGlobalGoal(2)

stopSimulation(2)

turn(2)

marking_fox(1)

marking_rabbit(1)

newGame(1)

moveEye(1)

move_snake(1)

offX(1)

next_gen(1)

occupied(1)

Méthodes fréquemment utilisées

World (30)

stopState (7)

add (7)

__init__ (6)

__str__ (6)

moveList (5)

setState (5)

allStates (5)

buildLineSegmentTestWorld (4)

buildEllipse (4)

Méthodes fréquemment utilisées

from_string (4)

singleMoveList (4)

perform_action (4)

allMoveList (4)

get_player (3)

performPreyMove (3)

addPlayer (3)

buildLineSegmentWorld (3)

nextPreyStates (2)

loadStartArea (2)

add_element (2)

init_schedules (2)

addEnemy (2)

from_snapshot (2)

add_garden (2)

loadSimulation (2)

addObject (2)

addCell (2)

apply_command (2)

to_s (2)

Méthodes fréquemment utilisées

add_element (2)

init_schedules (2)

addEnemy (2)

from_snapshot (2)

add_garden (2)

loadSimulation (2)

addObject (2)

addCell (2)

apply_command (2)

to_s (2)

del_player (2)

__create__ (2)

addArea (2)

buildAccelArrow (2)

get_location (2)

buildAccelSphere (2)

spawn_predator (2)

spawn_prey (2)

startSimulation (2)

buildGlobalGoal (2)

stopSimulation (2)

turn (2)

marking_fox (1)

marking_rabbit (1)

newGame (1)

moveEye (1)

move_snake (1)

offX (1)

next_gen (1)

occupied (1)

Méthodes fréquemment utilisées

del_player (2)

__create__ (2)

addArea (2)

buildAccelArrow (2)

get_location (2)

buildAccelSphere (2)

spawn_predator (2)

spawn_prey (2)

startSimulation (2)

buildGlobalGoal (2)

stopSimulation (2)

turn (2)

marking_fox (1)

marking_rabbit (1)

newGame (1)

moveEye (1)

move_snake (1)

offX (1)

next_gen (1)

occupied (1)

offY (1)

make_move (1)

AStar (1)

loop (1)

get_robots (1)

get_hash (1)

get_level (1)

get_local (1)

get_map_location (1)

get_max_x (1)

get_max_y (1)

get_player_pos (1)

get_players_from_location (1)

get_render_height (1)

get_render_width (1)

get_rnd_free_space (1)

get_xy (1)

load_state (1)

gravitizeAccelSphere (1)

initialize_screen (1)

Exemple #1

0

Afficher le fichier

Fichier : MCon.py Projet : HarrieO/Autonomous-Agents

def MCon(episodes, initValue=15,epsilon=0.1, alpha=0.5,discount=0.9): # world object, (starting state is trivial) world = World((0,0),(1,1)) # initialize Q value table and Return list for every (s,a)-pair Q = {} R = {} for state in world.allStates(): for move in world.moveList(): Q[state,move] = initValue # some value R[state,move] = [] # empty list; return = cummulative discounted reward steps = [0]*episodes # list counting number of iterations for i in range(episodes): iterations = 0 # initialize world world.setState((-5,-5)) stateActionPairs = {} # generate an episode using current policy while True: state = world.position # move the predator according to policy action = epsGreedyPolicy(state, world, Q, epsilon) world.move(action) if not (state,action) in stateActionPairs: # store first occurence stateActionPairs[(state,action)] = iterations # will be used for discounting iterations += 1 # check if predator caught the prey if world.stopState(): break # move the prey (stochasticly) world.performPreyMove() newState = world.position steps[i] = iterations # save amount of iterations needed to catch the prey # update Q and R for pair in stateActionPairs.keys(): firstReturn = 10.0*discount**(iterations-stateActionPairs[pair]) # always zero but 10 when episode ends R[pair].append(firstReturn) Q[pair] = np.mean(np.array(R[pair])) # update policy done in epsilon greedy policy code return steps

Exemple #2

0

Afficher le fichier

Fichier : Qlearning.py Projet : HarrieO/Autonomous-Agents

def Qlearning(episodes, policy, startState=(-5,-5), initValue=15,policyParam=0.1, alpha=0.4,discount=0.9): # world object, (starting state is trivial) world = World((0,0),(1,1)) # Q value table Q = {} for state in world.allStates(): for move in world.moveList(): Q[state,move] = initValue steps = [0]*episodes for i in range(episodes): iterations = 0 # initialize world world.setState(startState) while True: state = world.position # move the predator according to policy with one parameter (epsilon for E-greedy or Tua for softmax) action = policy(state, world, Q, policyParam) world.move(action) iterations += 1 # check if predator caught the prey if world.stopState(): # the Q(s,a) update rule (note that the next state is the absorbing state) Q[state,action] = Q[state,action] + alpha * (10 - Q[state,action]) break # move the prey (stochasticly) world.performPreyMove() newState = world.position # the maximum value the agent can have after another move maxQ = max([Q[newState,nextAction] for nextAction in world.moveList()]) # the Q(s,a) update rule (note that the immediate reward is zero) Q[state,action] = Q[state,action] + alpha * ( discount*maxQ - Q[state,action]) # print the number of steps the predator took steps[i] = iterations return steps

Exemple #3

0

Afficher le fichier

Fichier : MCoff.py Projet : HarrieO/Autonomous-Agents

def MCoff(episodes, behaPolicy, matches=[], initValue=15,discount=0.9): # behaPolicy = dictionary with keys (state,action) and value P(action|state) world = World((0,0),(1,1)) movelist = world.moveList() def policy(world): return world.pickElementWithProbs([(move,behaPolicy[(world.position,move)]) for move in movelist]) # initialize Q value table and Return list for every (s,a)-pair Q = {} R = {} num = {} denum = {} for state in world.allStates(): for move in world.moveList(): num[state,move] = 0.0 denum[state,move] = 0.0 Q[state,move] = float(initValue) # some value R[state,move] = [] # empty list; return = cummulative discounted reward steps = [0]*episodes # list counting number of iterations for epi in range(episodes): time = 0 totalTime =0 # initialize world world.setState((-5,-5)) episode = [] while True: action = policy(world) episode.append((world.position, action)) if action == None: print action, state world.move(action) if world.stopState(): break world.performPreyMove() # save the pairs that match, and their first occurence matchingHistory = {} # last time move was equal to policy last = 0 for i, (state, action) in enumerate(episode[::-1]): actionValues = [(Q[state,maction],maction) for maction in world.moveList()] bestActions = [actionValues[j][1] for j in maxIndices(actionValues)] matchingHistory[(state, action)] = len(episode)-i - 1 if action not in bestActions: last = len(episode)-i break matches.append(len(episode)-last) for (state, action) in matchingHistory: if matchingHistory[(state, action)] >= last-1: w = np.prod([ 1.0/behaPolicy[episode[j]] for j in range(matchingHistory[(state, action)],len(episode))]) num[(state,move)] += w * (10.0*discount**matchingHistory[(state, action)]) # return is gamma^{T-t}*10 denum[(state,move)] += w Q[(state,move)]= num[(state,move)]/float(denum[(state,move)]) world.setState((-5,-5)) iterations = 0 while True: iterations += 1 actionValues = [(maction, Q[state,maction]) for maction in world.moveList()] bestAction = random.choice([actionValues[j][0] for j in maxIndices(actionValues)]) world.move(bestAction) if world.stopState() or iterations > 2000: break world.performPreyMove() steps[epi] = iterations return steps