class QAgent(): def __init__(self, action_space, nb_state): self.ai = None self.ai = QLearn(actions=action_space, alpha=0.1, gamma=0.999, epsilon=0.9) self.lastState = None self.lastAction = None self.nb_state = nb_state def calculate_state(self, obs): pos = -1 speed = -1 step = 1.8 / self.nb_state step_speed = 0.14 / self.nb_state j = 0 for i in np.arange(-1.2, 0.6, step): if min(i, i + step) <= obs[0] <= max(i, i + step): pos = j j += 1 j = 0 for i in np.arange(-0.07, 0.07, step_speed): if min(i, i + step_speed) <= obs[1] <= max(i, i + step_speed): speed = j j += 1 return (pos, speed) def action(self, obs, reward): state = self.calculate_state(obs) if self.lastState is not None: self.ai.learn(self.lastState, self.lastAction, state, reward) action = self.ai.choose_action(state) self.lastState = state self.lastAction = action return action
class Animat: PossibleActions = [0,1,2,3,4,5,6] def __init__(self, x, y, mapX, mapY,env): self.x = x self.y = y self.webX = mapX-4 self.webY = mapY-4 self.energy = 1000 self.hunger = 1000 self.location = (self.x, self.y) self.layWeb = False self.env = env self.lastState = None self.lastAction = 0 self.brain = QLearn(Animat.PossibleActions) #-----Initialize animat's brain----# def animate(self): #--------Get current State--------# curState = self.getState() actionChosen = self.brain.chooseAction(curState) #--------Update Animat's energy levels------# if self.energy > 0: self.energy -= 0.5 if self.hunger < 1000: self.hunger += 0.1 #--------Calculate rewards from the selected action--------# selectedAction = actionChosen self.performAction(selectedAction) reward = self.getReward(curState,selectedAction) nextState = self.getState() #print "curState :",curState," Action:",selectedAction," reward:",reward," next:",nextState," web:",self.layWeb self.brain.learn(curState, selectedAction, reward, nextState) def performAction(self, selectedAction): self.layWeb = 0 if selectedAction == 0: self.explore(0) elif selectedAction == 1: self.explore(1) elif selectedAction == 2: self.explore(2) elif selectedAction == 3: self.explore(3) elif selectedAction == 4: self.eat() #print "Eat" elif selectedAction == 5: self.layWeb = 1 if (self.findFood() < 4): direct = self.findFood() else: direct = random.randrange(0,4) self.explore(direct) elif selectedAction ==6: pass else: pass def explore(self,direction): #direction = random.randrange(0,5) direction = direction if direction == 0: if (self.y+1 < self.webY+1 and self.env.canWalk(self.x,self.y+1)): self.y += 1 else: pass if direction == 1: if (self.x+1 < self.webX+1 and self.env.canWalk(self.x+1,self.y)): self.x += 1 else: pass if direction == 2: if (self.x-1 > 3 and self.env.canWalk(self.x-1,self.y)): self.x -= 1 else: pass if direction == 3: if (self.y-1 > 3 and self.env.canWalk(self.x,self.y-1)): self.y -= 1 else: pass def eat(self): if (self.env.returnFoodID(self.x,self.y) != -1): if self.energy < 960: self.energy += 40 if self.hunger > 0: self.hunger -= 20 self.env.removeFood(self.env.returnFoodID(self.x,self.y)) else: pass def findFood(self): direction = 0 # self.env.getFoodgradient(self.x,self.y) if (self.env.foodList): self.env.getFoodgradient(self.x,self.y) for i in range(0, len(self.env.foodList)): if (self.env.canWalk(self.env.foodList[i].x,self.env.foodList[i].y)): foodTarget = self.env.foodList[i] if (self.x != foodTarget.x): if(self.x < foodTarget.x): direction = 1 if(self.x > foodTarget.x-2 and self.x < foodTarget.x+2 and self.y > foodTarget.y-2 and self.y < foodTarget.y+2): direction += 6 #print "close to food:",self.x,",",self.y,"f:",foodTarget.x,",",foodTarget.y #print "east" return direction elif(self.x > foodTarget.x): direction = 2 if(self.x > foodTarget.x-2 and self.x < foodTarget.x+2 and self.y > foodTarget.y-2 and self.y < foodTarget.y+2): direction += 6 return direction elif (self.y != foodTarget.y): if(self.y < foodTarget.y): direction = 0 if(self.y > foodTarget.y-2 and self.y < foodTarget.y+2 and self.x > foodTarget.x-2 and self.x < foodTarget.x+2): direction += 6 return direction elif(self.y > foodTarget.y): direction += 3 if(self.y > foodTarget.y-2 and self.y < foodTarget.y+2 and self.x > foodTarget.x-2 and self.x < foodTarget.x+2): direction += 6 return direction else: if(self.hunger > 0): direction = 4 else: direction = 10 return direction direction = 5 return direction def getState(self): state = self.findFood() return state def getReward(self, state, action): reward = -1 state = state action = action if(action < 4 and action == state): #----- follow food gradient----# reward += 10 elif(state == 4): if(action == 4): reward += 50 else: reward = -30 elif(state == 10): #------On top of food b/ not hungry----# if(action == 5): reward += 50 elif(action == 4): reward =-1 elif(state == 5): #------No food -----# if(action == 6): reward += 10 elif(action != 4 and action != 5): reward += 5 else: reward -= 10 if(state > 5 and state == action+6): #----- close to food source---# reward += 10 if(state > 5 and action == 5): reward += 50 return reward
class Prey(Object): senserange = 80 distinct = 10 # actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ["up", "down", "left", "right", "eat", "stay"] def __init__(self, x=0, y=0, file="qlearn.txt"): Object.__init__(self, "prey", x, y) self.qlearn = QLearn(Prey.actions) # self.origin = (self.x, self.y) self.step = 4 self.dumbed = 0 fin = open(file, "r") lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) # print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.target = None self.fd = diag self.hawkxy = [] self.hd = diag def tick(self, env): # qLearn. # initial reward for each step self.init() currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) self.act(currentState, action, env) # print dis nextState = self.getState(env) # get the new state after performing actions reward = self.getReward( currentState, nextState, action ) # update energies and get the reward after performing action if currentState == 11: print currentState, action, reward # #time.sleep(1) # print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) # update the Q Table def init(self): self.hawk = None self.reward = 0 self.pdis = 0 self.safe = 0 def act(self, state, action, env): step = self.step # if state == 6: # step = Prey.foodrange if action == "up": self.y = self.y - step if action == "down": self.y = self.y + step if action == "left": self.x = self.x - step if action == "right": self.x = self.x + step if action == "stroll": if self.target not in env.food: self.target = random.choice(env.food) food = self.target self.x += 2 * (step * (random.random() - 0.5)) self.y += 2 * (step * (random.random() - 0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x, y) self.x = int((t * self.x + 2 * food.x) / (t + 2.0)) self.y = int((t * self.y + 2 * food.y) / (t + 2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == "eat": self.eat(state, env) # print self.x, self.y # print action if self.pdis != 0: hawk = self.hawk diffx = hawk.x - self.x diffy = hawk.y - self.y dis = abs(diffx) + abs(diffy) self.reward = dis - self.pdis else: dis = abs(self.x - self.target.x) + abs(self.y - self.target.y) self.reward = self.fdis - dis def adjustxy(self): self.dumbed = 0 if self.x < 0: self.x = 0 self.dumbed = 1 elif self.x > width: self.x = width self.dumbed = 1 if self.y < 0: self.y = 0 self.dumbed = 1 elif self.y > height: self.y = height self.dumbed = 1 def getState(self, env): hawk = env.find("hawk", (self.x, self.y), Prey.senserange) if hawk != None: err = 10 shelter = env.find("hawkshelter", (self.x, self.y), Prey.senserange) if shelter != None and abs(shelter.y - self.y) <= err and abs(shelter.x - self.x) <= err: self.safe = 1 return 11 # right shelter self.hawk = hawk state = [] diffx = hawk.x - self.x diffy = hawk.y - self.y x = self.x - 2 * diffx y = self.y - 2 * diffy if x < 0: x = 0 elif x > width: x = width if y < 0: y = 0 elif y > height: y = height self.target = Food(x, y) # print self.target.x, self.target.y # 7 8 9 10 ## coordinate 1 2 3 4 err = 5 if diffy < -err: state += [7] if diffy > err: state += [8] if diffx < -err: state += [9] if diffx > err: state += [10] self.pdis += abs(diffx) + abs(diffy) # print state if state == []: state = [7, 8, 9, 10] return random.choice(state) err = 10 food = env.find("food", (self.x, self.y), diag) if self.target == None: self.target = food if self.target not in env.food: if abs(self.target.y - self.y) <= err and abs(self.target.x - self.x) <= err: self.target = random.choice(env.food) food = env.find("food", (self.x, self.y), Prey.senserange) if food != None: self.target = food food = self.target state = [] if food != None: if food.y + err < self.y: state += [1] * (abs(food.y - self.y) * int(10 * random.random() + 1)) # up if food.y - err > self.y: state += [2] * (abs(food.y - self.y) * int(10 * random.random() + 1)) # down if food.x + err < self.x: state += [3] * (abs(food.x - self.x) * int(10 * random.random() + 1)) # left if food.x - err > self.x: state += [4] * (abs(food.x - self.x) * int(10 * random.random() + 1)) # right if abs(food.y - self.y) <= err and abs(food.x - self.x) <= err: state.append(5) # on # food = env.find('food', (self.x, self.y), Prey.senserange) # if food!=None: # self.target = food # else: # if self.target == None: # self.target = random.choice(env.food) # if self.target not in env.food and random.random()<0.05: # self.target = random.choice(env.food) else: state.append(6) # nothing if self.target == None: self.target = random.choice(env.food) if self.target not in env.food and random.random() < 0.05: self.target = random.choice(env.food) self.fdis = abs(self.x - self.target.x) + abs(self.y - self.target.y) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing # print state # print state # print state # food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, nstate, action): # if state==6: reward = self.reward if self.safe == 1: reward += 100 if action == "eat": reward -= 25 if state == 5: reward += 125 # if state<=4: # for food # if dis<=0: # <=0 further # reward -= 10 # elif state == 6: # if dis>0: # reward += int(random.random()*2)*10 # elif state<=11 and state>=7: # for hawk # #reward # if dis<0: # <=0 further # reward += 20 # else: # reward -= 40 if self.dumbed: reward -= 100 # if state<=6 and nstate>6: # print state # reward -= 300 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 # self.reward -= 1; # else: # return -10 def eat(self, state, env): if state == 5: # print 'eating' food = env.find("food", (self.x, self.y), Prey.senserange) if food != None: env.remove(food)
class Prey(Object): senserange = 80 distinct = 10 #actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ['up', 'down', 'left','right', 'stay', 'eat'] def __init__(self, x=0, y=0, file='qlearn.txt'): Object.__init__(self, 'prey', x, y) self.qlearn = QLearn(Prey.actions) #self.origin = (self.x, self.y) self.dangerous = 0 self.step = 4 self.dumbed = 0 fin = open(file, 'r') lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) #print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.food = None self.fd = diag self.hawkxy = [] self.hd = diag self.bush = None self.bd = diag def tick(self, env): #qLearn. # initial reward for each step currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) dis = self.act(currentState, action, env) nextState = self.getState(env) #get the new state after performing actions #print dis reward = self.getReward(currentState, nextState, action, dis) #update energies and get the reward after performing action if currentState>=7: print currentState, action, reward, dis #print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table def act(self, state, action, env): step = self.step #if state == 6: # step = Prey.foodrange if action == 'up': self.y = self.y-step if action == 'down': self.y = self.y+step if action == 'left': self.x = self.x-step if action == 'right': self.x = self.x+step if action == 'stroll': if self.food not in env.food: self.food = random.choice(env.food) food = self.food print food.x, food.y self.x += 2 * (step * (random.random()-0.5)) self.y += 2 * (step * (random.random()-0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x,y) self.x = int((t*self.x + 2*food.x) / (t+2.0)) self.y = int((t*self.y + 2*food.y) / (t+2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == 'eat': self.eat(state, env) if state <= 6: return self.fd - (abs(self.x-self.food.x) + abs(self.y-self.food.y)) elif state <= 11: hd = 0 for hawkxy in self.hawkxy: hd += (abs(self.x-hawkxy[0]) + abs(self.y-hawkxy[1])) return (self.hd - hd) #print self.x, self.y #print action def adjustxy(self): self.dumbed = 1 if self.x < 0: self.x = 0 elif self.x > width: self.x = width elif self.y < 0: self.y = 0 elif self.y > height: self.y = height else: self.dumbed = 0 def getState(self, env): if self.dangerous: r = Predator.senserange+Prey.distinct self.food = random.choice(env.food) else: r = Prey.senserange hawks = env.findall('hawk', (self.x, self.y), r) if len(hawks) != 0: self.hawkxy = [] self.dangerous = 1 if len(hawks) == 1: hawk = hawks[0] err = 0 state = [] if hawk.y + err <= self.y and hawk.x - err >= self.x: state += [7] # 1 up right if hawk.y + err <= self.y and hawk.x + err <= self.x: state += [8] # 2 up left if hawk.y - err >= self.y and hawk.x + err <= self.x: state += [9] # 3 down left if hawk.y - err >= self.y and hawk.x - err >= self.x: state += [10] # 4 down right self.hawkxy.append((hawk.x, hawk.y)) self.hd = abs(self.x-hawk.x) + abs(self.y-hawk.y) return random.choice(state) else: self.hd = 0 for hawk in hawks: self.hawkxy.append((hawk.x, hawk.y)) self.hd += (abs(self.x-hawk.x)+abs(self.y-hawk.y)) return 11 # many hawks self.dangerous = 0 food = env.find('food', (self.x, self.y), Prey.senserange) err = 10 state = [] if food != None: if food.y + err < self.y: state += [1] * (abs(food.y - self.y) * int(10*random.random()+1)) # up if food.y - err > self.y: state += [2] * (abs(food.y - self.y) * int(10*random.random()+1)) # down if food.x + err < self.x: state += [3] * (abs(food.x - self.x) * int(10*random.random()+1))# left if food.x - err > self.x: state += [4] * (abs(food.x - self.x) * int(10*random.random()+1))# right if abs(food.y-self.y) <= err and abs(food.x-self.x) <= err: state.append(5) # on self.food = food else: state.append(6) # nothing if self.food not in env.food: self.food = random.choice(env.food) self.fd = abs(self.x-self.food.x) + abs(self.y-self.food.y) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing #print state #print state #print state #food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, nstate, action, dis): #if state==6: reward = 0 if action == 'eat': reward -= 25 if state == 5: reward += 125 if state<=6: # for food if dis<=0: # <=0 further reward -= 100 else: reward -= 10 elif state<=11: # for hawk #reward if dis<0: # <=0 further reward += 20 else: reward -= 40 if self.dumbed: reward -= 200 #if state<=6 and nstate>6: # reward -= 1000 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 #self.reward -= 1; # else: # return -10 def eat(self, state, env): if state==5: # print 'eating' food = env.find('food', (self.x, self.y), Prey.senserange) env.remove(food) #if __name__ == "__main__": # p = Prey() # print p.x
class Animat: #Class Parameters energyPerTick = [] singleFoodEaten = [] multipleFoodEaten = [] energyPerTickIndex = 0 singleFoodEatenIndex = 0 multipleFoodEatenIndex = 0 count = 0 ID = -1 allowDeath = False foodTargeting = True energyThreshold = 80 actions = ['north', 'south', 'east', 'west', 'eat', 'pickup', 'drop'] def __init__(self, starty, startx, env, foodTypes, max_energy=100.0, start_energy=50.0, idnum=1): #Initialize instance parameters self.y = starty self.x = startx self.env = env self.ID = idnum self.foodTypes = foodTypes self.energy = [start_energy] * len(self.foodTypes) self.maxEnergy = [max_energy] * len(self.foodTypes) self.previousEnergy = copy.copy(self.energy) self.energyUsageRate = [1.0 / len(self.foodTypes)] * len( self.foodTypes) self.foodsEaten = [0] * len(self.foodTypes) self.holding = [-1] * len(self.foodTypes) #Initialize flags self.moved = False #if animat moved and direction animat moved in self.alive = True #Initialize threshold parameters self.reproductionThreshold = 40.0 #need 40 energy units to reproduce #Load the Neural Net (CURRENTLY UNUSED: we are using q learner instead) #nni = NNInitializer() #self.neuralNet = nni.readNetwork(filename) #Update Class Parameters Animat.count += 1 #Initialize Q-Table (States and Actions) self.qLearn = QLearn(Animat.actions) #Statistics self.multipleFoodEaten = 0 self.multipleDrop = 0 def tick(self): #qLearn. currentState = self.getState() targetFood = self.getTargetFoodSource() targetFoodDirection = self.senseEnvironment(targetFood) #print targetFood,self.energy action = self.qLearn.chooseAction(currentState) self.performQLearnAction(action) reward = self.getReward( targetFoodDirection, action ) #update energies and get the reward after performing action #print "Reward is:", reward nextState = self.getState( ) #get the new state after performing actions self.qLearn.learn(currentState, action, reward, nextState) #update the Q Table self.resetFlags() self.checkDeath() return self.alive #Perform action based on input action. Should return the integer value #of the +/- reward experienced from performing the action def performQLearnAction(self, action): if action == 'north': self.move(self.y - 1, self.x) if action == 'south': self.move(self.y + 1, self.x) if action == 'east': self.move(self.y, self.x + 1) if action == 'west': self.move(self.y, self.x - 1) if action == 'eat': self.eatAll() if action == 'pickup': self.pickupAnything() if action == 'drop': self.dropAnything() def getState(self): # Pick 1 or 0 for each state, add to total, # then shift total << total = 1 if Animat.foodTargeting: total *= 100 targetFood = self.getTargetFoodSource() if targetFood == 0: total += 0 elif targetFood == 1: total += 1 elif targetFood == 2: total += 10 elif targetFood == 3: total += 11 for i in self.foodTypes: total *= 10 total += 1 if (self.holding[self.foodTypes[i]] > 0) else 0 total *= 10 total += 1 if (self.isOnFood(i)) else 0 foodgradient = self.senseEnvironment(i) total *= 10 total *= 10 if (foodgradient == 'north'): total += 0 elif (foodgradient == 'south'): total += 1 elif (foodgradient == 'west'): total += 10 elif (foodgradient == 'east'): total += 11 return int(str(total), 2) @classmethod def randomStart(cls, sizey, sizex): # Given the size of the environment, start at random location #self.y = random.randint(1,sizey-1) - 1 #self.x = random.randint(1,sizex-1) - 1 return cls( random.randint(1, sizey - 1) - 1, random.randint(1, sizex - 1) - 1) @classmethod def setDeath(death): Animat.allowDeath = death @classmethod def resetStats(Death): Animat.energyPerTick = [] Animat.singleFoodEaten = [] Animat.multipleFoodEaten = [] Animat.energyPerTickIndex = 0 Animat.singleFoodEatenIndex = 0 Animat.multipleFoodEatenIndex = 0 @classmethod def startTick(self): Animat.energyPerTick.append(0) Animat.singleFoodEaten.append(0) Animat.multipleFoodEaten.append(0) if (Animat.singleFoodEatenIndex > 0): Animat.singleFoodEaten[ Animat.singleFoodEatenIndex] += Animat.singleFoodEaten[ Animat.singleFoodEatenIndex - 1] if (Animat.multipleFoodEatenIndex > 0): Animat.multipleFoodEaten[ Animat.multipleFoodEatenIndex] += Animat.multipleFoodEaten[ Animat.multipleFoodEatenIndex - 1] @classmethod def endTick(self): Animat.energyPerTickIndex += 1 Animat.singleFoodEatenIndex += 1 Animat.multipleFoodEatenIndex += 1 def displayLocation(self): print "y is " + str(self.y) + ", x is " + str(self.x) def move(self, newy, newx): if self.env[0].canMove(self.y, self.x, newy, newx): self.y = newy self.x = newx self.moved = True for f in self.foodTypes: if (self.holding[f] >= 0): # move the food I'm holding self.env[f].returnFood(self.holding[f]).y = self.y self.env[f].returnFood(self.holding[f]).x = self.x break def pickupAnything(self): # Go through the food types for i, foodType in enumerate(self.foodTypes): if self.pickup(foodType): return def pickup(self, foodType): # Check to see if we're holding anything already. # Enforce holding one item at a time. if max(self.holding) == -1: foodID = self.env[foodType].returnFoodIDAt(self.y, self.x) if foodID != -1: # There is food here. Can we pick it up? if self.env[foodType].returnFood(foodID).pickUp(): # We successfully picked it up self.holding[foodType] = self.env[foodType].returnFoodIDAt( self.y, self.x) return True return False def dropAnything(self): # Drop whatever we're holding for i, foodType in enumerate(self.foodTypes): if self.drop(foodType): return def drop(self, foodType): # Check to see if self.holding[foodType] != -1: # Check to see if we're about to drop one food type on a different food type # If so, increment self.multipleDrop # def isOnFood(self,foodType): for f in self.foodTypes: if foodType != f and self.isOnFood(f): self.multipleDrop += 1 break self.env[foodType].returnFood(self.holding[foodType]).drop() self.holding[foodType] = -1 return True return False def checkDeath(self): if Animat.allowDeath: for e in self.energy: if e <= 0: Animat.count -= 1 self.alive = False print "Animat died." return def eatAnything(self): for i, foodType in enumerate(self.foodTypes): if self.eat(foodType): return def eatAll(self): for i, foodType in enumerate(self.foodTypes): if self.eat(foodType): self.foodsEaten[i] = 1 else: self.foodsEaten[i] = 0 return self.foodsEaten def eat(self, foodType): foodId = self.env[foodType].returnFoodIDAt(self.y, self.x) if foodId >= 0: foodItem = self.env[foodType].returnFood(foodId) if not foodItem.held: self.eatFood(foodItem, foodType) return True return False def eatFood(self, foodItem, foodType): foodItem.eat() if foodItem.size == 0: self.env[foodType].removeFood(foodItem.id) print "Food removed from environment" def printEnergy(self): print "Energy: " + str(self.energy) def senseEnvironment(self, foodType): inputValues = self.env[foodType].getScentsCEWNS(self.y, self.x) maxVal = max(inputValues) maxIndeces = [ i for i, mymax in enumerate(inputValues) if mymax == maxVal ] if maxIndeces: maxIndex = choice(maxIndeces) if maxIndex == 0: state = 'center' if maxIndex == 1: state = 'east' if maxIndex == 2: state = 'west' if maxIndex == 3: state = 'north' if maxIndex == 4: state = 'south' return state def isOnFood(self, foodType): id = self.env[foodType].returnFoodIDAt(self.y, self.x) if id != -1: return True return False def followGradient(self, stateMachine, toEat, toFollow): if stateMachine == 'notholding': self.dropAnything() self.performQLearnAction(self.senseEnvironment(toEat)) if self.isOnFood(toEat): if self.pickup(toEat): return 'holding' else: return 'fail' return 'notholding' elif stateMachine == 'holding': self.performQLearnAction(self.senseEnvironment(toFollow)) if self.isOnFood(toFollow): if self.drop(toEat): return 'eat' return 'holding' elif stateMachine == 'eat': #if self.isOnFood(toEat): # self.eat(toEat); # return 'eat'; #elif self.isOnFood(toFollow): # self.eat(toFollow); # return 'eat'; if self.isOnFood(toEat) or self.isOnFood(toFollow): if not max(self.eatAll()) == 0: return 'eat' else: return 'fail' else: return 'notholding' def replenishEnergy(self, energy=500.0): self.energy = [energy] * len(self.foodTypes) self.alive = True #reset flags for next iteration def resetFlags(self): self.moved = False self.followedGradient = False self.foodsEaten = [0] * len(self.foodTypes) def getTargetFoodSource(self): energyTilMax = [y - x for x, y in zip(self.energy, self.maxEnergy) ] # maxEnergy - currEnergy for each food source satiation = [y * x for x, y in zip(self.energyUsageRate, energyTilMax)] satiation = [ y if x < 0 else -1 for x, y in zip(self.holding, satiation) ] maxFollowValue = max(satiation) targetFoodSources = [ i for i, mymax in enumerate(satiation) if mymax == maxFollowValue ] if targetFoodSources: targetFood = choice(targetFoodSources) return targetFood def getReward(self, targetDirection, action): #Animat Parameter Constants LIVING_COST = 1.0 MOVEMENT_COST = 0.01 # Cost to move one unit EATING_REWARD = 10.0 # Reward for eating one food source EATING_MULT_ENERGY = 50.0 EATING_MULT_REWARD = 100.0 GRADIENT_FOLLOW_REWARD = 10.0 previousEnergy = copy.copy(self.energy) #print "Foods eaten: ", self.foodsEaten #Reward Gradient gradientReward = 0 if targetDirection == action: LIVING_COST = 0 MOVEMENT_COST = 0 #gradientReward = LIVING_COST + MOVEMENT_COST #offset cost if following the target gradient #print targetDirection, action #Subtract living cost and movement cost for each energy rate self.energy = [ currEnergy + EATING_REWARD * foodEaten - rate * (LIVING_COST + MOVEMENT_COST * self.moved) for currEnergy, rate, foodEaten in zip( self.energy, self.energyUsageRate, self.foodsEaten) ] self.energy = [ min(currEnergy, maxEnergy) for currEnergy, maxEnergy in zip(self.energy, self.maxEnergy) ] #Limit energy to max energy numFoodEaten = self.foodsEaten.count(1) if numFoodEaten > 1: for i, v in enumerate(self.foodsEaten): if v > 0: self.energy[ i] += EATING_MULT_ENERGY #Add extra energy to food buckets when they eat multiple foods #Compute delta energy for each energy bucket deltaEnergy = [ currEnergy - prevEnergy for currEnergy, prevEnergy in zip(self.energy, previousEnergy) ] netDeltaEnergy = sum(deltaEnergy) #sum up all of the delta energies #Determine a reward multiplier if eating multiple foods when hungry rewardsMultiplier = 1 if numFoodEaten > 1 and netDeltaEnergy > 0: rewardsMultiplier += pow(EATING_MULT_REWARD, numFoodEaten - 1) print "Ate ", numFoodEaten, " food sources!" self.multipleFoodEaten += 1 Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex] += 1 if numFoodEaten > 0: Animat.singleFoodEaten[Animat.singleFoodEatenIndex] += 1 reward = netDeltaEnergy * rewardsMultiplier + gradientReward Animat.energyPerTick[Animat.energyPerTickIndex] += netDeltaEnergy #print action, targetDirection, self.foodsEaten, previousEnergy, self.energy, deltaEnergy, netDeltaEnergy #print reward return reward
class Prey(Object): knowsnake = 0 senserange = 80 distinct = 10 #actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ['up', 'down', 'left', 'right', 'eat', 'stay'] def __init__(self, gen = 'AA', x=0, y=0 , file='qlearn.txt'): Object.__init__(self, 'prey', x, y) self.qlearn = QLearn(Prey.actions) #self.origin = (self.x, self.y) self.step = 4 self.gen = gen self.dumbed = 0 fin = open(file, 'r') lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) #print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.target = None self.fd = diag self.hawkxy = [] self.hd = diag self.energy = 70 def tick(self, env): #qLearn. # initial reward for each step self.init() currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) self.act(currentState, action, env) #print dis nextState = self.getState(env) #get the new state after performing actions reward = self.getReward(currentState, nextState, action) #update energies and get the reward after performing action # print currentState, action, reward # #time.sleep(1) #print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table def init(self): self.hawk = None self.reward = 0 self.pdis = 0 self.safe = [] self.escape = 0 def act(self, state, action, env): self.energy -= 0.3 if self.energy >= 70: env.prey.append(Prey()) if self.energy <= 0: env.prey.remove(self) print 'die from food' step = self.step #if state == 6: # step = Prey.foodrange if action == 'up': self.y = self.y-step if action == 'down': self.y = self.y+step if action == 'left': self.x = self.x-step if action == 'right': self.x = self.x+step if action == 'stroll': if self.target not in env.food: self.target = random.choice(env.food) food = self.target self.x += 2 * (step * (random.random()-0.5)) self.y += 2 * (step * (random.random()-0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x,y) self.x = int((t*self.x + 2*food.x) / (t+2.0)) self.y = int((t*self.y + 2*food.y) / (t+2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == 'eat': self.eat(state, env) #print self.x, self.y #print action dis = abs(self.x-self.target.x) + abs(self.y-self.target.y) self.reward = self.fdis - dis def adjustxy(self): self.dumbed = 0 if self.x < 0: self.x = 0 self.dumbed = 1 elif self.x > width: self.x = width self.dumbed = 1 if self.y < 0: self.y = 0 self.dumbed = 1 elif self.y > height: self.y = height self.dumbed = 1 def getState(self, env): err = self.step-1 # check existence of predator hawks = env.findall('hawk', (self.x, self.y), Prey.senserange) foxes = env.findall('fox', (self.x, self.y), Prey.senserange) snakes = env.findall('snake', (self.x, self.y), Prey.senserange) predators = hawks+foxes type = ['hawk']*min(1,len(hawks)) + ['fox']*min(1,len(foxes)) if Prey.knowsnake > random.random(): predators += snakes type += ['snake']*min(1,len(snakes)) if snakes != []: Prey.knowsnake += 0.001 Prey.knowsnake = min(1.5, Prey.knowsnake) if predators != []: self.escape = 1 x = 0 y = 0 for predator in predators: x += predator.x y += predator.y x /= len(predators) y /= len(predators) nearest = env.find('predator', (self.x, self.y), Prey.senserange) shelter = env.findshelter(type, (self.x, self.y), (x,y), Prey.senserange) if shelter!=None: self.target = shelter else: # diffx = x - self.x # diffy = y - self.y x = 3*self.x-2*x y = 3*self.y-2*y if x < 0: if y < 0: x, y = -y, -x elif y > height: x, y = y-height, height+x elif y <= height/2: x, y = 0, y+Prey.senserange else: x, y = 0, y-Prey.senserange elif x > width: if y < 0: x, y = -y, x-width elif y > height: x, y = width+height-y, width+height-x elif y <= height/2: x, y = width, y+Prey.senserange else: x, y = width, y-Prey.senserange elif x < width/2: if y < 0: x, y = x+Prey.senserange, 0 elif y > height: x, y = x+Prey.senserange, height else: if y < 0: x, y = x-Prey.senserange, 0 elif y > height: x, y = x-Prey.senserange, height self.target = Object(x, y) else: food = env.findall('food', (self.x, self.y), Prey.senserange) if len(food)==0: food = None elif self.target not in food: food = random.choice(food) else: food = self.target if food!=None: self.target = food if self.escape == 0 and self.target!=None: if abs(self.target.y-self.y) <= err and abs(self.target.x-self.x) <= err: self.target = food # food = env.find('food', (self.x, self.y), Prey.senserange) # if food!=None: # self.target = food target = self.target state = [] if target != None: if target.y + err < self.y: state += [1] * (abs(target.y - self.y) * int(10*random.random()+1)) # up if target.y - err > self.y: state += [2] * (abs(target.y - self.y) * int(10*random.random()+1)) # down if target.x + err < self.x: state += [3] * (abs(target.x - self.x) * int(10*random.random()+1))# left if target.x - err > self.x: state += [4] * (abs(target.x - self.x) * int(10*random.random()+1))# right if abs(target.y-self.y) <= err and abs(target.x-self.x) <= err: if target in (env.bush+env.hole+env.tree): state.append(6) # on self.safe = target.shield else: state.append(5) # on # food = env.find('food', (self.x, self.y), Prey.senserange) # if food!=None: # self.target = food # else: # if self.target == None: # self.target = random.choice(env.food) # if self.target not in env.food and random.random()<0.05: # self.target = random.choice(env.food) else: state.append(6) # nothing if (self.target == None or self.target not in env.food): self.target = random.choice(env.food) self.fdis = abs(self.x-self.target.x) + abs(self.y-self.target.y) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing #print state #print state #print state #food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, nstate, action): #if state==6: reward = self.reward if self.safe != [] and self.escape == 1: reward += 100 if action == 'eat': reward -= 25 if state == 5: reward += 125 # if state<=4: # for food # if dis<=0: # <=0 further # reward -= 10 # elif state == 6: # if dis>0: # reward += int(random.random()*2)*10 # elif state<=11 and state>=7: # for hawk # #reward # if dis<0: # <=0 further # reward += 20 # else: # reward -= 40 if self.dumbed: reward -= 100 # if state<=6 and nstate>6: # print state # reward -= 300 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 #self.reward -= 1; # else: # return -10 def eat(self, state, env): if state==5: # print 'eating' food = env.find('food', (self.x, self.y), Prey.senserange) if food!=None: self.energy += 15 self.energy = min(self.energy, 100) env.remove(food) #if __name__ == "__main__": # p = Prey() # print p.x
class Animat: #Class Parameters energyPerTick = [] singleFoodEaten=[] multipleFoodEaten=[] energyPerTickIndex = 0 singleFoodEatenIndex = 0 multipleFoodEatenIndex = 0 count = 0 ID = -1; allowDeath = False foodTargeting = True energyThreshold = 80 actions = ['north', 'south', 'east','west','eat','pickup','drop'] def __init__(self,starty,startx, env, foodTypes, max_energy = 100.0, start_energy = 50.0, idnum = 1): #Initialize instance parameters self.y = starty self.x = startx self.env = env self.ID = idnum self.foodTypes = foodTypes self.energy = [start_energy] * len(self.foodTypes) self.maxEnergy = [max_energy] * len(self.foodTypes) self.previousEnergy = copy.copy(self.energy) self.energyUsageRate = [1.0/len(self.foodTypes)] * len(self.foodTypes) self.foodsEaten = [0] * len(self.foodTypes) self.holding = [-1] * len(self.foodTypes) #Initialize flags self.moved = False #if animat moved and direction animat moved in self.alive = True #Initialize threshold parameters self.reproductionThreshold = 40.0 #need 40 energy units to reproduce #Load the Neural Net (CURRENTLY UNUSED: we are using q learner instead) #nni = NNInitializer() #self.neuralNet = nni.readNetwork(filename) #Update Class Parameters Animat.count += 1 #Initialize Q-Table (States and Actions) self.qLearn = QLearn(Animat.actions) #Statistics self.multipleFoodEaten = 0; self.multipleDrop = 0; def tick(self): #qLearn. currentState = self.getState() targetFood = self.getTargetFoodSource() targetFoodDirection = self.senseEnvironment(targetFood) #print targetFood,self.energy action = self.qLearn.chooseAction(currentState) self.performQLearnAction(action) reward = self.getReward(targetFoodDirection, action) #update energies and get the reward after performing action #print "Reward is:", reward nextState = self.getState() #get the new state after performing actions self.qLearn.learn(currentState, action, reward, nextState) #update the Q Table self.resetFlags() self.checkDeath() return self.alive #Perform action based on input action. Should return the integer value #of the +/- reward experienced from performing the action def performQLearnAction(self,action): if action == 'north': self.move(self.y - 1, self.x) if action == 'south': self.move(self.y + 1, self.x) if action == 'east': self.move(self.y, self.x + 1) if action == 'west': self.move(self.y,self.x - 1) if action == 'eat': self.eatAll() if action == 'pickup': self.pickupAnything() if action == 'drop': self.dropAnything() def getState(self): # Pick 1 or 0 for each state, add to total, # then shift total << total = 1; if Animat.foodTargeting: total *= 100 targetFood = self.getTargetFoodSource(); if targetFood == 0: total += 0; elif targetFood == 1: total += 1; elif targetFood == 2: total += 10; elif targetFood == 3: total += 11; for i in self.foodTypes: total *= 10; total += 1 if (self.holding[self.foodTypes[i]] > 0) else 0; total *= 10; total += 1 if (self.isOnFood(i)) else 0; foodgradient = self.senseEnvironment(i) total *= 10; total *= 10; if (foodgradient == 'north'): total += 0; elif (foodgradient == 'south'): total += 1; elif (foodgradient == 'west'): total += 10; elif (foodgradient == 'east'): total += 11; return int(str(total),2); @classmethod def randomStart(cls,sizey,sizex): # Given the size of the environment, start at random location #self.y = random.randint(1,sizey-1) - 1 #self.x = random.randint(1,sizex-1) - 1 return cls(random.randint(1,sizey-1) - 1,random.randint(1,sizex-1) - 1) @classmethod def setDeath(death): Animat.allowDeath = death @classmethod def resetStats(Death): Animat.energyPerTick = [] Animat.singleFoodEaten = [] Animat.multipleFoodEaten = [] Animat.energyPerTickIndex = 0 Animat.singleFoodEatenIndex = 0 Animat.multipleFoodEatenIndex = 0 @classmethod def startTick(self): Animat.energyPerTick.append(0) Animat.singleFoodEaten.append(0) Animat.multipleFoodEaten.append(0) if(Animat.singleFoodEatenIndex > 0): Animat.singleFoodEaten[Animat.singleFoodEatenIndex] += Animat.singleFoodEaten[Animat.singleFoodEatenIndex - 1] if(Animat.multipleFoodEatenIndex > 0): Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex] += Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex - 1] @classmethod def endTick(self): Animat.energyPerTickIndex += 1 Animat.singleFoodEatenIndex += 1 Animat.multipleFoodEatenIndex += 1 def displayLocation(self): print "y is " + str(self.y) + ", x is " + str(self.x) def move(self,newy,newx): if self.env[0].canMove(self.y,self.x,newy,newx): self.y = newy self.x = newx self.moved = True for f in self.foodTypes: if (self.holding[f] >= 0): # move the food I'm holding self.env[f].returnFood(self.holding[f]).y = self.y; self.env[f].returnFood(self.holding[f]).x = self.x; break; def pickupAnything(self): # Go through the food types for i,foodType in enumerate(self.foodTypes): if self.pickup(foodType): return; def pickup(self,foodType): # Check to see if we're holding anything already. # Enforce holding one item at a time. if max(self.holding) == -1: foodID = self.env[foodType].returnFoodIDAt(self.y,self.x); if foodID != -1: # There is food here. Can we pick it up? if self.env[foodType].returnFood(foodID).pickUp(): # We successfully picked it up self.holding[foodType] = self.env[foodType].returnFoodIDAt(self.y,self.x) return True; return False; def dropAnything(self): # Drop whatever we're holding for i,foodType in enumerate(self.foodTypes): if self.drop(foodType): return; def drop(self,foodType): # Check to see if self.holding[foodType] != -1: # Check to see if we're about to drop one food type on a different food type # If so, increment self.multipleDrop # def isOnFood(self,foodType): for f in self.foodTypes: if foodType != f and self.isOnFood(f): self.multipleDrop += 1; break; self.env[foodType].returnFood(self.holding[foodType]).drop(); self.holding[foodType] = -1; return True; return False; def checkDeath(self): if Animat.allowDeath: for e in self.energy: if e <= 0: Animat.count -= 1 self.alive = False; print "Animat died." return def eatAnything(self): for i,foodType in enumerate(self.foodTypes): if self.eat(foodType): return; def eatAll(self): for i,foodType in enumerate(self.foodTypes): if self.eat(foodType): self.foodsEaten[i] = 1; else: self.foodsEaten[i] = 0; return self.foodsEaten; def eat(self,foodType): foodId = self.env[foodType].returnFoodIDAt(self.y, self.x) if foodId >= 0: foodItem = self.env[foodType].returnFood(foodId) if not foodItem.held: self.eatFood(foodItem,foodType) return True; return False; def eatFood(self,foodItem,foodType): foodItem.eat(); if foodItem.size == 0: self.env[foodType].removeFood(foodItem.id); print "Food removed from environment" def printEnergy(self): print "Energy: "+str(self.energy); def senseEnvironment(self, foodType): inputValues = self.env[foodType].getScentsCEWNS(self.y,self.x) maxVal = max(inputValues) maxIndeces = [i for i, mymax in enumerate(inputValues) if mymax == maxVal] if maxIndeces: maxIndex = choice(maxIndeces) if maxIndex == 0: state = 'center' if maxIndex == 1: state = 'east' if maxIndex == 2: state = 'west' if maxIndex == 3: state = 'north' if maxIndex == 4: state = 'south' return state def isOnFood(self,foodType): id = self.env[foodType].returnFoodIDAt(self.y,self.x) if id != -1: return True return False def followGradient(self,stateMachine,toEat,toFollow): if stateMachine == 'notholding': self.dropAnything(); self.performQLearnAction(self.senseEnvironment(toEat)); if self.isOnFood(toEat): if self.pickup(toEat): return 'holding' else: return 'fail'; return 'notholding' elif stateMachine == 'holding': self.performQLearnAction(self.senseEnvironment(toFollow)); if self.isOnFood(toFollow): if self.drop(toEat): return 'eat' return 'holding' elif stateMachine == 'eat': #if self.isOnFood(toEat): # self.eat(toEat); # return 'eat'; #elif self.isOnFood(toFollow): # self.eat(toFollow); # return 'eat'; if self.isOnFood(toEat) or self.isOnFood(toFollow): if not max(self.eatAll()) == 0: return 'eat'; else: return 'fail' else: return 'notholding'; def replenishEnergy(self,energy=500.0): self.energy = [energy] * len(self.foodTypes) self.alive = True #reset flags for next iteration def resetFlags(self): self.moved = False self.followedGradient = False self.foodsEaten = [0] * len(self.foodTypes) def getTargetFoodSource(self): energyTilMax = [y - x for x,y in zip(self.energy, self.maxEnergy)] # maxEnergy - currEnergy for each food source satiation = [y * x for x,y in zip(self.energyUsageRate, energyTilMax)] satiation = [y if x < 0 else -1 for x,y in zip(self.holding,satiation)] maxFollowValue = max(satiation) targetFoodSources = [i for i, mymax in enumerate(satiation) if mymax == maxFollowValue] if targetFoodSources: targetFood = choice(targetFoodSources) return targetFood def getReward(self, targetDirection, action): #Animat Parameter Constants LIVING_COST = 1.0 MOVEMENT_COST = 0.01 # Cost to move one unit EATING_REWARD = 10.0 # Reward for eating one food source EATING_MULT_ENERGY = 50.0 EATING_MULT_REWARD = 100.0 GRADIENT_FOLLOW_REWARD = 10.0 previousEnergy = copy.copy(self.energy) #print "Foods eaten: ", self.foodsEaten #Reward Gradient gradientReward = 0 if targetDirection == action: LIVING_COST = 0 MOVEMENT_COST = 0 #gradientReward = LIVING_COST + MOVEMENT_COST #offset cost if following the target gradient #print targetDirection, action #Subtract living cost and movement cost for each energy rate self.energy = [ currEnergy + EATING_REWARD * foodEaten - rate * (LIVING_COST + MOVEMENT_COST * self.moved) for currEnergy, rate, foodEaten in zip(self.energy, self.energyUsageRate, self.foodsEaten)] self.energy = [ min(currEnergy, maxEnergy) for currEnergy, maxEnergy in zip(self.energy,self.maxEnergy)] #Limit energy to max energy numFoodEaten = self.foodsEaten.count(1) if numFoodEaten > 1: for i, v in enumerate(self.foodsEaten): if v > 0: self.energy[i] += EATING_MULT_ENERGY #Add extra energy to food buckets when they eat multiple foods #Compute delta energy for each energy bucket deltaEnergy = [ currEnergy - prevEnergy for currEnergy, prevEnergy in zip(self.energy, previousEnergy)] netDeltaEnergy = sum(deltaEnergy) #sum up all of the delta energies #Determine a reward multiplier if eating multiple foods when hungry rewardsMultiplier = 1 if numFoodEaten > 1 and netDeltaEnergy > 0: rewardsMultiplier += pow(EATING_MULT_REWARD, numFoodEaten - 1) print "Ate ",numFoodEaten," food sources!" self.multipleFoodEaten += 1; Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex] += 1 if numFoodEaten > 0: Animat.singleFoodEaten[Animat.singleFoodEatenIndex] += 1 reward = netDeltaEnergy * rewardsMultiplier + gradientReward Animat.energyPerTick[Animat.energyPerTickIndex] += netDeltaEnergy #print action, targetDirection, self.foodsEaten, previousEnergy, self.energy, deltaEnergy, netDeltaEnergy #print reward return reward
class Prey(Object): senserange = 100 distinct = 10 #actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ['up', 'down', 'left','right', 'stay', 'eat'] def __init__(self, x=0, y=0, file='qlearn.txt'): Object.__init__(self, 'prey', x, y) self.qlearn = QLearn(Prey.actions) #self.origin = (self.x, self.y) self.dangerous = 0 self.step = 4 self.dumbed = 0 self.lastact = None self.foodeaten = 0 fin = open(file, 'r') lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) #print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.food = None self.fd = diag self.hawk = None self.hd = diag self.bush = None self.bd = diag self.hunger = 0 def tick(self, env): #qLearn. # initial reward for each step currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) self.act(currentState, action, env) #print dis reward = self.getReward(currentState, action) #update energies and get the reward after performing action nextState = self.getState(env) #get the new state after performing actions print currentState, action, reward #self.hunger, (self.food.x, self.food.y) #if currentState>=7: # print currentState, action, reward #print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table def act(self, state, action, env): self.hunger += 1 step = self.step #if state == 6: # step = Prey.foodrange if action == 'up': self.y = self.y-step if action == 'down': self.y = self.y+step if action == 'left': self.x = self.x-step if action == 'right': self.x = self.x+step if action == 'stroll': if self.food not in env.food: self.food = random.choice(env.food) food = self.food print food.x, food.y self.x += 2 * (step * (random.random()-0.5)) self.y += 2 * (step * (random.random()-0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x,y) self.x = int((t*self.x + 2*food.x) / (t+2.0)) self.y = int((t*self.y + 2*food.y) / (t+2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == 'eat': self.eat(state, env) # if state <= 6: # return self.fd - (abs(self.x-self.food.x) + abs(self.y-self.food.y)) # elif state <= 11: # hd = 0 # for hawkxy in self.hawkxy: # hd += (abs(self.x-hawkxy[0]) + abs(self.y-hawkxy[1])) # return (self.hd - hd) #print self.x, self.y #print action def adjustxy(self): self.dumbed = 1 if self.x < 0: self.x = 0 elif self.x > width: self.x = width elif self.y < 0: self.y = 0 elif self.y > height: self.y = height else: self.dumbed = 0 def getState(self, env): self.bush = env.find('bush', (self.x, self.y), Prey.senserange) self.origin = (self.x, self.y) err = 0 state = [] hawk = env.find('hawk', (self.x, self.y), Prey.senserange) if hawk != None: xdiff = hawk.x - self.x ydiff = hawk.y - self.y if abs(xdiff)>=abs(ydiff): if xdiff > 0: return 10 # hawk on the right elif xdiff < 0: return 9 # on the left else: return random.choice([7,8,9,10]) else: if ydiff > 0: return 8 # down else: return 7 # up # if hawk.y + err < self.y: # state += [7] * (100/(abs(hawk.y - self.y))) # up # if hawk.y - err > self.y: # state += [8] * (100/(abs(hawk.y - self.y))) # down # if hawk.x + err < self.x: # state += [9] * (100/(abs(hawk.x - self.x)))# left # if hawk.x - err > self.x: # state += [10] * (100/(abs(hawk.x - self.x)))# right # self.hawk = hawk # if len(state)==0: # state = [7,8,9,10] # return random.choice(state) err = 10 self.dangerous = 0 food = env.find('food', (self.x, self.y), diag) if food != None: if food.y + err < self.y: state += [1] * (abs(food.y - self.y) * int(10*random.random()+1)) # up if food.y - err > self.y: state += [2] * (abs(food.y - self.y) * int(10*random.random()+1)) # down if food.x + err < self.x: state += [3] * (abs(food.x - self.x) * int(10*random.random()+1))# left if food.x - err > self.x: state += [4] * (abs(food.x - self.x) * int(10*random.random()+1))# right if abs(food.y-self.y) <= err and abs(food.x-self.x) <= err: state.append(5) # on self.food = food else: state.append(6) # nothing if self.food not in env.food: self.food = random.choice(env.food) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing #print state #print state #print state #food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, action): #if state==6: reward = 0 # energy consumption if action == 'eat': reward -= 10 #elif action != 'stay': # reward -= 1 # getting hungry reward -= 5 # food eaten if self.foodeaten == 1: reward += 110 # food dis if self.food != None: dis = abs(self.x-self.food.x)+abs(self.y-self.food.y) dis -= (abs(self.origin[0]-self.food.x)+abs(self.origin[1]-self.food.y)) if state <= 6: if dis < 0: reward += 10 if self.hawk == None and action == 'stay': reward -= 50 # hawk dis if self.hawk != None: dis = max(abs(self.x-self.hawk.x), abs(self.y-self.hawk.y)) dis -= max(abs(self.origin[0]-self.hawk.x), abs(self.origin[1]-self.hawk.y)) if dis > 0: reward += 20 else: dis = min(abs(self.x-self.hawk.x), abs(self.y-self.hawk.y)) dis -= min(abs(self.origin[0]-self.hawk.x), abs(self.origin[1]-self.hawk.y)) if dis > 0: reward += 10 else: reward -= 50 if self.dumbed: reward -= 200 #if state<=6 and nstate>6: # reward -= 1000 # init for next tick self.foodeaten = 0 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 #self.reward -= 1; # else: # return -10 def eat(self, state, env): if state==5: self.foodeaten = 1 # print 'eating' food = env.find('food', (self.x, self.y), Prey.senserange) env.remove(food) self.hunger -= 50 self.hunger = max(0, self.hunger) #if __name__ == "__main__": # p = Prey() # print p.x
while j < n_moves: try: state = env.get_state_from_observation(observation) action = Q.select_action(state, episode, n_epochs) #env.previous_position = observation ## remove this? observation_, actions_binary, disp_ctrl = sim.update( action, reward) reward, done, kms_to_go = env.step(action, observation, observation_) state_ = env.get_state_from_observation(observation_) Q.learn(state, action, reward, state_) if reward < 0.0: penalties = penalties + 1 print_summary_results(episode, j, reward, kms_to_go, penalties, state, actions_binary, observation_, disp_ctrl) observation = observation_ j = j + 1 if done: break except: print("the while loop error number ", errors) errors = errors + 1