Python World.allMoveList Exemples

Langage de programmation: Python

Espace de nommage/Pack: world

Class/Type: World

Méthode/Fonction: allMoveList

Exemples au hotexamples.com: 4

Python World.allMoveList - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de world.World.allMoveList extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

World(30)

stopState(7)

add(7)

__init__(6)

__str__(6)

moveList(5)

setState(5)

allStates(5)

buildLineSegmentTestWorld(4)

buildEllipse(4)

from_string(4)

singleMoveList(4)

perform_action(4)

allMoveList(4)

get_player(3)

performPreyMove(3)

addPlayer(3)

buildLineSegmentWorld(3)

nextPreyStates(2)

loadStartArea(2)

add_element(2)

init_schedules(2)

addEnemy(2)

from_snapshot(2)

add_garden(2)

loadSimulation(2)

addObject(2)

addCell(2)

apply_command(2)

to_s(2)

del_player(2)

__create__(2)

addArea(2)

buildAccelArrow(2)

get_location(2)

buildAccelSphere(2)

spawn_predator(2)

spawn_prey(2)

startSimulation(2)

buildGlobalGoal(2)

stopSimulation(2)

turn(2)

marking_fox(1)

marking_rabbit(1)

newGame(1)

moveEye(1)

move_snake(1)

offX(1)

next_gen(1)

occupied(1)

Méthodes fréquemment utilisées

World (30)

stopState (7)

add (7)

__init__ (6)

__str__ (6)

moveList (5)

setState (5)

allStates (5)

buildLineSegmentTestWorld (4)

buildEllipse (4)

Méthodes fréquemment utilisées

from_string (4)

singleMoveList (4)

perform_action (4)

allMoveList (4)

get_player (3)

performPreyMove (3)

addPlayer (3)

buildLineSegmentWorld (3)

nextPreyStates (2)

loadStartArea (2)

add_element (2)

init_schedules (2)

addEnemy (2)

from_snapshot (2)

add_garden (2)

loadSimulation (2)

addObject (2)

addCell (2)

apply_command (2)

to_s (2)

Méthodes fréquemment utilisées

add_element (2)

init_schedules (2)

addEnemy (2)

from_snapshot (2)

add_garden (2)

loadSimulation (2)

addObject (2)

addCell (2)

apply_command (2)

to_s (2)

del_player (2)

__create__ (2)

addArea (2)

buildAccelArrow (2)

get_location (2)

buildAccelSphere (2)

spawn_predator (2)

spawn_prey (2)

startSimulation (2)

buildGlobalGoal (2)

stopSimulation (2)

turn (2)

marking_fox (1)

marking_rabbit (1)

newGame (1)

moveEye (1)

move_snake (1)

offX (1)

next_gen (1)

occupied (1)

Méthodes fréquemment utilisées

del_player (2)

__create__ (2)

addArea (2)

buildAccelArrow (2)

get_location (2)

buildAccelSphere (2)

spawn_predator (2)

spawn_prey (2)

startSimulation (2)

buildGlobalGoal (2)

stopSimulation (2)

turn (2)

marking_fox (1)

marking_rabbit (1)

newGame (1)

moveEye (1)

move_snake (1)

offX (1)

next_gen (1)

occupied (1)

offY (1)

make_move (1)

AStar (1)

loop (1)

get_robots (1)

get_hash (1)

get_level (1)

get_local (1)

get_map_location (1)

get_max_x (1)

get_max_y (1)

get_player_pos (1)

get_players_from_location (1)

get_render_height (1)

get_render_width (1)

get_rnd_free_space (1)

get_xy (1)

load_state (1)

gravitizeAccelSphere (1)

initialize_screen (1)

Exemple #1

0

Afficher le fichier

Fichier : Qlearning.py Projet : HarrieO/Autonomous-Agents

def Qlearning(episodes, initialState,policy,alpha_pred=0.2,alpha_prey=0.5): initValue=0 policyParam=0.2 discount=0.7 # world object, (starting state is trivial) world = World((5,5),initialState) # Q value table Q_pred = {} Q_prey = {} steps = [0]*episodes rewards = [0]*episodes for i in range(episodes): iterations = 0 # initialize world world = World((5,5),initialState) while True: # world.prettyPrint() state = world.position # move the predator according to policy with one parameter (epsilon for E-greedy or Tua for softmax) pred_action = policy(state, world.allMoveList(), Q_pred, policyParam, initValue) prey_action = policy(state, world.singleMoveList(), Q_prey, policyParam, initValue) reward = world.move(prey_action, pred_action) iterations += 1 if (state,pred_action) not in Q_pred: Q_pred[(state,pred_action)] = initValue if (state,prey_action) not in Q_prey: Q_prey[(state,prey_action)] = initValue # check if predator caught the prey if world.stopState(): # the Q(s,a) update rule (note that the next state is the absorbing state) Q_prey[state,prey_action] = Q_prey.get((state,prey_action),initValue) + alpha_prey * (reward[0] - Q_prey[state,prey_action]) Q_pred[state,pred_action] = Q_pred.get((state,pred_action),initValue) + alpha_pred * (reward[1] - Q_pred[state,pred_action]) break newState = world.position # the maximum value the agent can have after another move maxQ_pred = max([Q_pred.get((newState,nextAction),initValue) for nextAction in world.allMoveList()]) maxQ_prey = max([Q_prey.get((newState,nextAction),initValue) for nextAction in world.singleMoveList()]) # the Q(s,a) update rule (note that the immediate reward is zero) Q_pred[state,pred_action] = Q_pred[(state,pred_action)] + alpha_pred * ( discount*maxQ_pred - Q_pred[state,pred_action]) Q_prey[state,prey_action] = Q_prey[(state,prey_action)] + alpha_prey * ( discount*maxQ_prey - Q_prey[state,prey_action]) if i > 0 and i % 1000 == 0: print "Episode", i # print the number of steps the predator took steps[i] = iterations if reward[1] > 0: rewards[i] = 1 return steps, rewards

Exemple #2

0

Afficher le fichier

Fichier : Assignment3.1.py Projet : HarrieO/Autonomous-Agents

from world import World import random import pylab as pl #world = World((5,5),[(10,0),(0,10),(0,0),(10,10),(10,1),(0,9),(0,1),(10,9)]) predatorLocations = [(0,0),(0,10),(10,0),(10,10)] preds = [] iters = [] prey = [] for no in range(len(predatorLocations)): world = World((5,5),predatorLocations[:no+1]) allMoves = world.allMoveList() singleMoves = world.singleMoveList() runs = 1000 totalCaughtPrey = 0 totalIterations = 0 for i in range(runs): world = World((5,5),predatorLocations[:no+1]) iterations = 0 while not world.stopState(): preyMove = random.choice(singleMoves) predatorMoves = random.choice(allMoves) reward = world.move(preyMove,predatorMoves) iterations += 1 if reward[0] < 0: totalCaughtPrey += 1 totalIterations += iterations

Exemple #3

0

Afficher le fichier

Fichier : policyHillClimbing.py Projet : HarrieO/Autonomous-Agents

def policyHillClimbing(episodes,initial_state,gamma=0.5, delta=0.2, alpha_pred=0.4, alpha_prey=0.1): world = World((5,5),initial_state) # initialization might be too expansive Q_pred = {} Q_prey = {} seen_states = [] # keep track of seen states pi_pred = {} pi_prey = {} initValue = 0.0 num_actions_prey = len(world.singleMoveList()) num_actions_pred = len(world.allMoveList()) steps = [0]*episodes rewards = [0]*episodes for i in range(episodes): # initialize world world = World((5,5),initial_state) iterations =0 state = world.position seen_states.append(state) seen_states.append((0,0)) for action_p in world.singleMoveList(): Q_prey[(state, action_p)] = initValue pi_prey[(state, action_p)] = 1/float(num_actions_prey) Q_prey[((0,0), action_p)] = 0 for action_p in world.allMoveList(): Q_pred[(state,action_p)] = initValue pi_pred[(state, action_p)] = 1/float(num_actions_pred) Q_pred[((0,0), action_p)] = 0 while not world.stopState(): state = world.position # choose action action_pred = greedy_policy(pi_pred, state, world.allMoveList()) action_prey = greedy_policy(pi_prey, state, world.singleMoveList()) reward = world.move(action_prey,action_pred) new_state = world.position iterations +=1 # update Q if new_state not in seen_states: seen_states.append(new_state) for action_p in world.singleMoveList(): Q_prey[(new_state, action_p)] = initValue pi_prey[(new_state, action_p)] = 1/float(num_actions_prey) for action_p in world.allMoveList(): Q_pred[(new_state,action_p)] = initValue pi_pred[(new_state, action_p)] = 1/float(num_actions_pred) best_Q_pred = max([Q_pred[(new_state,action)] for action in world.allMoveList()]) best_Q_prey = max([Q_prey[(new_state,action)] for action in world.singleMoveList()]) Q_pred[(state,action_pred)] = (1.0-alpha_pred)*Q_pred[(state,action_pred)] + alpha_pred*(reward[1]+ gamma* best_Q_pred) Q_prey[(state,action_prey)] = (1.0-alpha_prey)*Q_prey[(state,action_prey)] + alpha_prey*(reward[0]+ gamma* best_Q_prey) # update pi for predator and prey if Q_pred[(state,action_pred)] == max([Q_pred[(state,action)] for action in world.allMoveList()]): pi_pred[(state,action_pred)] += delta else: pi_pred[(state,action_pred)] -= delta/(num_actions_pred-1.0) if Q_prey[(state,action_prey)] == max([Q_prey[(state,action)] for action in world.singleMoveList()]): pi_prey[(state,action_prey)] += delta else: pi_prey[(state,action_prey)] -= delta/(num_actions_prey-1.0) # restrict to probability distribution and make it epsilon greedy (divide 0.1 over all actions) sum_value = sum([Q_pred[(state,action)] for action in world.allMoveList()]) for action_p in world.allMoveList(): if sum_value > 0: pi_pred[(state, action_p)] /= sum_value pi_pred[(state, action_p)] *= 0.9 pi_pred[(state, action_p)] += 0.1/num_actions_pred sum_value = sum([Q_prey[(state,action)] for action in world.singleMoveList()]) for action_p in world.singleMoveList(): if sum_value > 0: pi_prey[(state, action_p)] /= sum_value pi_prey[(state, action_p)] *= 0.9 pi_prey[(state, action_p)] += 0.1/num_actions_prey #alpha *= decay rewards[i]=reward[0] steps[i]= iterations print "Episode", i return steps, rewards

Exemple #4

0

Afficher le fichier

Fichier : minimax.py Projet : HarrieO/Autonomous-Agents

def minimax(episodes,initial_state,epsilon, decay, gamma, alpha_pred=1.0, alpha_prey=1.0): # initialization might be too expansive Q_pred = dict() Q_prey = dict() V_pred = dict() V_prey = dict() pi_pred = dict() pi_prey = dict() initValue = 1.0 # initialisation world = World((5,5),initial_state) for state in world.allStates(): V_pred[state] = 1.0 V_prey[state] = 1.0 for action in world.allMoveList(): pi_pred[(state,action)]=1.0/len(world.allMoveList()) for prey_move in world.singleMoveList(): Q_pred[(state, action, prey_move)]=1.0 Q_prey[(state, action, prey_move)]=1.0 for action in world.singleMoveList(): pi_prey[(state,action)]=1.0/len(world.singleMoveList()) # absorbing states terminal_state = tuple([(0,0)] * len(initial_state)) V_pred[terminal_state] = 0.0 V_prey[terminal_state] = 0.0 steps = [0]*episodes rewards = [0]*episodes for epi in range(episodes): # initialize world world = World((5,5),initial_state) # print "Begin Pred", V_pred[world.position] # print "End Prey", V_prey[world.position] # for s in world.singleMoveList(): # print s, "Pred", V_pred[(s,)] # print s, "Prey", V_pred[(s,)] # for a in world.allMoveList(): # for a2 in world.singleMoveList(): # print s, "Q", a, a2, Q_pred[(state,a,a2)] iterations =0 while not world.stopState(): state = world.position # choose action action_pred = minimax_policy(epsilon, pi_pred, state, world.allMoveList()) action_prey = minimax_policy(epsilon, pi_prey, state, world.singleMoveList()) reward = world.move(action_prey,action_pred) iterations +=1 new_state = world.position # update Q # if (state,action_prey) not in Q_prey: # Q_prey[state,action_prey] = initValue # if (state,action_pred) not in Q_pred: # Q_pred[state,action_pred] = initValue Q_pred[(state,action_pred,action_prey)] = (1.0-alpha_pred)*Q_pred[(state,action_pred,action_prey)] + alpha_pred*(reward[1]+ gamma* V_pred[new_state]) Q_prey[(state,action_pred,action_prey)] = (1.0-alpha_prey)*Q_prey[(state,action_pred,action_prey)] + alpha_prey*(reward[0]+ gamma* V_prey[new_state]) # update pi # adapted from example: http://abel.ee.ucla.edu/cvxopt/examples/tutorial/lp.html ## PREDATOR update # constraint to minimize w.r.t. prey action minConstr = [[1.0] + [-Q_pred[(state,a_pred,a_prey)] for a_pred in world.allMoveList()] for a_prey in world.singleMoveList()] # constrinat to keep every pi(a) positive posConstr = [] for i in range(1,len(world.allMoveList())+1): new_row = [0.0] * (len(world.allMoveList())+1) new_row[i] = -1.0 posConstr.append(new_row) normGreater = [0.0] + [1.0] * len(world.allMoveList()) normSmaller = [0.0] + [-1.0] * len(world.allMoveList()) A = matrix([normGreater, normSmaller] + minConstr + posConstr).trans() b = matrix([ 1.0, -1.0] + [0.0] * (len(world.singleMoveList()) + len(world.allMoveList())) ) # -1 V and 0 for all pi(s,a) c = matrix([ -1.0 ] + [0.0] * len(world.allMoveList())) sol=solvers.lp(c,A,b) V_pred[state] = sol['x'][0] for a_pred, x in zip(world.allMoveList(),sol['x'][1:]): pi_pred[(state,a_pred)] = x # ## PREY update # constraint to minimize w.r.t. prey action minConstr = [[1.0] + [-Q_prey[(state,a_pred,a_prey)] for a_prey in world.singleMoveList()] for a_pred in world.allMoveList()] # # constriant to keep every pi(a) positive posConstr = [] for i in range(1,len(world.singleMoveList())+1): new_row = [0.0] * (len(world.singleMoveList())+1) new_row[i] = -1.0 posConstr.append(new_row) normGreater = [0.0] + [ 1.0] * len(world.singleMoveList()) normSmaller = [0.0] + [-1.0] * len(world.singleMoveList()) A = matrix([normGreater, normSmaller] + minConstr + posConstr).trans() b = matrix([ 1.0, -1.0] + [0.0] * (len(world.allMoveList()) + len(world.singleMoveList())) ) # -1 V and 0 for all pi(s,a) c = matrix([ -1.0 ] + [0.0] * len(world.singleMoveList())) sol=solvers.lp(c,A,b) V_prey[state] = sol['x'][0] for a_prey, x in zip(world.singleMoveList(),sol['x'][1:]): pi_prey[(state,a_prey)] = x alpha_pred *= decay alpha_prey *= decay if epi > 0 and epi % 50 == 0: print "Episode",epi steps[epi] = iterations if reward[1] > 0: rewards[epi] = 1 return steps, rewards