class Prey(Object): senserange = 80 distinct = 10 # actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ["up", "down", "left", "right", "eat", "stay"] def __init__(self, x=0, y=0, file="qlearn.txt"): Object.__init__(self, "prey", x, y) self.qlearn = QLearn(Prey.actions) # self.origin = (self.x, self.y) self.step = 4 self.dumbed = 0 fin = open(file, "r") lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) # print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.target = None self.fd = diag self.hawkxy = [] self.hd = diag def tick(self, env): # qLearn. # initial reward for each step self.init() currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) self.act(currentState, action, env) # print dis nextState = self.getState(env) # get the new state after performing actions reward = self.getReward( currentState, nextState, action ) # update energies and get the reward after performing action if currentState == 11: print currentState, action, reward # #time.sleep(1) # print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) # update the Q Table def init(self): self.hawk = None self.reward = 0 self.pdis = 0 self.safe = 0 def act(self, state, action, env): step = self.step # if state == 6: # step = Prey.foodrange if action == "up": self.y = self.y - step if action == "down": self.y = self.y + step if action == "left": self.x = self.x - step if action == "right": self.x = self.x + step if action == "stroll": if self.target not in env.food: self.target = random.choice(env.food) food = self.target self.x += 2 * (step * (random.random() - 0.5)) self.y += 2 * (step * (random.random() - 0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x, y) self.x = int((t * self.x + 2 * food.x) / (t + 2.0)) self.y = int((t * self.y + 2 * food.y) / (t + 2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == "eat": self.eat(state, env) # print self.x, self.y # print action if self.pdis != 0: hawk = self.hawk diffx = hawk.x - self.x diffy = hawk.y - self.y dis = abs(diffx) + abs(diffy) self.reward = dis - self.pdis else: dis = abs(self.x - self.target.x) + abs(self.y - self.target.y) self.reward = self.fdis - dis def adjustxy(self): self.dumbed = 0 if self.x < 0: self.x = 0 self.dumbed = 1 elif self.x > width: self.x = width self.dumbed = 1 if self.y < 0: self.y = 0 self.dumbed = 1 elif self.y > height: self.y = height self.dumbed = 1 def getState(self, env): hawk = env.find("hawk", (self.x, self.y), Prey.senserange) if hawk != None: err = 10 shelter = env.find("hawkshelter", (self.x, self.y), Prey.senserange) if shelter != None and abs(shelter.y - self.y) <= err and abs(shelter.x - self.x) <= err: self.safe = 1 return 11 # right shelter self.hawk = hawk state = [] diffx = hawk.x - self.x diffy = hawk.y - self.y x = self.x - 2 * diffx y = self.y - 2 * diffy if x < 0: x = 0 elif x > width: x = width if y < 0: y = 0 elif y > height: y = height self.target = Food(x, y) # print self.target.x, self.target.y # 7 8 9 10 ## coordinate 1 2 3 4 err = 5 if diffy < -err: state += [7] if diffy > err: state += [8] if diffx < -err: state += [9] if diffx > err: state += [10] self.pdis += abs(diffx) + abs(diffy) # print state if state == []: state = [7, 8, 9, 10] return random.choice(state) err = 10 food = env.find("food", (self.x, self.y), diag) if self.target == None: self.target = food if self.target not in env.food: if abs(self.target.y - self.y) <= err and abs(self.target.x - self.x) <= err: self.target = random.choice(env.food) food = env.find("food", (self.x, self.y), Prey.senserange) if food != None: self.target = food food = self.target state = [] if food != None: if food.y + err < self.y: state += [1] * (abs(food.y - self.y) * int(10 * random.random() + 1)) # up if food.y - err > self.y: state += [2] * (abs(food.y - self.y) * int(10 * random.random() + 1)) # down if food.x + err < self.x: state += [3] * (abs(food.x - self.x) * int(10 * random.random() + 1)) # left if food.x - err > self.x: state += [4] * (abs(food.x - self.x) * int(10 * random.random() + 1)) # right if abs(food.y - self.y) <= err and abs(food.x - self.x) <= err: state.append(5) # on # food = env.find('food', (self.x, self.y), Prey.senserange) # if food!=None: # self.target = food # else: # if self.target == None: # self.target = random.choice(env.food) # if self.target not in env.food and random.random()<0.05: # self.target = random.choice(env.food) else: state.append(6) # nothing if self.target == None: self.target = random.choice(env.food) if self.target not in env.food and random.random() < 0.05: self.target = random.choice(env.food) self.fdis = abs(self.x - self.target.x) + abs(self.y - self.target.y) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing # print state # print state # print state # food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, nstate, action): # if state==6: reward = self.reward if self.safe == 1: reward += 100 if action == "eat": reward -= 25 if state == 5: reward += 125 # if state<=4: # for food # if dis<=0: # <=0 further # reward -= 10 # elif state == 6: # if dis>0: # reward += int(random.random()*2)*10 # elif state<=11 and state>=7: # for hawk # #reward # if dis<0: # <=0 further # reward += 20 # else: # reward -= 40 if self.dumbed: reward -= 100 # if state<=6 and nstate>6: # print state # reward -= 300 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 # self.reward -= 1; # else: # return -10 def eat(self, state, env): if state == 5: # print 'eating' food = env.find("food", (self.x, self.y), Prey.senserange) if food != None: env.remove(food)
class Prey(Object): knowsnake = 0 senserange = 80 distinct = 10 #actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ['up', 'down', 'left', 'right', 'eat', 'stay'] def __init__(self, gen = 'AA', x=0, y=0 , file='qlearn.txt'): Object.__init__(self, 'prey', x, y) self.qlearn = QLearn(Prey.actions) #self.origin = (self.x, self.y) self.step = 4 self.gen = gen self.dumbed = 0 fin = open(file, 'r') lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) #print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.target = None self.fd = diag self.hawkxy = [] self.hd = diag self.energy = 70 def tick(self, env): #qLearn. # initial reward for each step self.init() currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) self.act(currentState, action, env) #print dis nextState = self.getState(env) #get the new state after performing actions reward = self.getReward(currentState, nextState, action) #update energies and get the reward after performing action # print currentState, action, reward # #time.sleep(1) #print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table def init(self): self.hawk = None self.reward = 0 self.pdis = 0 self.safe = [] self.escape = 0 def act(self, state, action, env): self.energy -= 0.3 if self.energy >= 70: env.prey.append(Prey()) if self.energy <= 0: env.prey.remove(self) print 'die from food' step = self.step #if state == 6: # step = Prey.foodrange if action == 'up': self.y = self.y-step if action == 'down': self.y = self.y+step if action == 'left': self.x = self.x-step if action == 'right': self.x = self.x+step if action == 'stroll': if self.target not in env.food: self.target = random.choice(env.food) food = self.target self.x += 2 * (step * (random.random()-0.5)) self.y += 2 * (step * (random.random()-0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x,y) self.x = int((t*self.x + 2*food.x) / (t+2.0)) self.y = int((t*self.y + 2*food.y) / (t+2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == 'eat': self.eat(state, env) #print self.x, self.y #print action dis = abs(self.x-self.target.x) + abs(self.y-self.target.y) self.reward = self.fdis - dis def adjustxy(self): self.dumbed = 0 if self.x < 0: self.x = 0 self.dumbed = 1 elif self.x > width: self.x = width self.dumbed = 1 if self.y < 0: self.y = 0 self.dumbed = 1 elif self.y > height: self.y = height self.dumbed = 1 def getState(self, env): err = self.step-1 # check existence of predator hawks = env.findall('hawk', (self.x, self.y), Prey.senserange) foxes = env.findall('fox', (self.x, self.y), Prey.senserange) snakes = env.findall('snake', (self.x, self.y), Prey.senserange) predators = hawks+foxes type = ['hawk']*min(1,len(hawks)) + ['fox']*min(1,len(foxes)) if Prey.knowsnake > random.random(): predators += snakes type += ['snake']*min(1,len(snakes)) if snakes != []: Prey.knowsnake += 0.001 Prey.knowsnake = min(1.5, Prey.knowsnake) if predators != []: self.escape = 1 x = 0 y = 0 for predator in predators: x += predator.x y += predator.y x /= len(predators) y /= len(predators) nearest = env.find('predator', (self.x, self.y), Prey.senserange) shelter = env.findshelter(type, (self.x, self.y), (x,y), Prey.senserange) if shelter!=None: self.target = shelter else: # diffx = x - self.x # diffy = y - self.y x = 3*self.x-2*x y = 3*self.y-2*y if x < 0: if y < 0: x, y = -y, -x elif y > height: x, y = y-height, height+x elif y <= height/2: x, y = 0, y+Prey.senserange else: x, y = 0, y-Prey.senserange elif x > width: if y < 0: x, y = -y, x-width elif y > height: x, y = width+height-y, width+height-x elif y <= height/2: x, y = width, y+Prey.senserange else: x, y = width, y-Prey.senserange elif x < width/2: if y < 0: x, y = x+Prey.senserange, 0 elif y > height: x, y = x+Prey.senserange, height else: if y < 0: x, y = x-Prey.senserange, 0 elif y > height: x, y = x-Prey.senserange, height self.target = Object(x, y) else: food = env.findall('food', (self.x, self.y), Prey.senserange) if len(food)==0: food = None elif self.target not in food: food = random.choice(food) else: food = self.target if food!=None: self.target = food if self.escape == 0 and self.target!=None: if abs(self.target.y-self.y) <= err and abs(self.target.x-self.x) <= err: self.target = food # food = env.find('food', (self.x, self.y), Prey.senserange) # if food!=None: # self.target = food target = self.target state = [] if target != None: if target.y + err < self.y: state += [1] * (abs(target.y - self.y) * int(10*random.random()+1)) # up if target.y - err > self.y: state += [2] * (abs(target.y - self.y) * int(10*random.random()+1)) # down if target.x + err < self.x: state += [3] * (abs(target.x - self.x) * int(10*random.random()+1))# left if target.x - err > self.x: state += [4] * (abs(target.x - self.x) * int(10*random.random()+1))# right if abs(target.y-self.y) <= err and abs(target.x-self.x) <= err: if target in (env.bush+env.hole+env.tree): state.append(6) # on self.safe = target.shield else: state.append(5) # on # food = env.find('food', (self.x, self.y), Prey.senserange) # if food!=None: # self.target = food # else: # if self.target == None: # self.target = random.choice(env.food) # if self.target not in env.food and random.random()<0.05: # self.target = random.choice(env.food) else: state.append(6) # nothing if (self.target == None or self.target not in env.food): self.target = random.choice(env.food) self.fdis = abs(self.x-self.target.x) + abs(self.y-self.target.y) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing #print state #print state #print state #food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, nstate, action): #if state==6: reward = self.reward if self.safe != [] and self.escape == 1: reward += 100 if action == 'eat': reward -= 25 if state == 5: reward += 125 # if state<=4: # for food # if dis<=0: # <=0 further # reward -= 10 # elif state == 6: # if dis>0: # reward += int(random.random()*2)*10 # elif state<=11 and state>=7: # for hawk # #reward # if dis<0: # <=0 further # reward += 20 # else: # reward -= 40 if self.dumbed: reward -= 100 # if state<=6 and nstate>6: # print state # reward -= 300 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 #self.reward -= 1; # else: # return -10 def eat(self, state, env): if state==5: # print 'eating' food = env.find('food', (self.x, self.y), Prey.senserange) if food!=None: self.energy += 15 self.energy = min(self.energy, 100) env.remove(food) #if __name__ == "__main__": # p = Prey() # print p.x
class Prey(Object): senserange = 80 distinct = 10 #actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ['up', 'down', 'left','right', 'stay', 'eat'] def __init__(self, x=0, y=0, file='qlearn.txt'): Object.__init__(self, 'prey', x, y) self.qlearn = QLearn(Prey.actions) #self.origin = (self.x, self.y) self.dangerous = 0 self.step = 4 self.dumbed = 0 fin = open(file, 'r') lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) #print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.food = None self.fd = diag self.hawkxy = [] self.hd = diag self.bush = None self.bd = diag def tick(self, env): #qLearn. # initial reward for each step currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) dis = self.act(currentState, action, env) nextState = self.getState(env) #get the new state after performing actions #print dis reward = self.getReward(currentState, nextState, action, dis) #update energies and get the reward after performing action if currentState>=7: print currentState, action, reward, dis #print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table def act(self, state, action, env): step = self.step #if state == 6: # step = Prey.foodrange if action == 'up': self.y = self.y-step if action == 'down': self.y = self.y+step if action == 'left': self.x = self.x-step if action == 'right': self.x = self.x+step if action == 'stroll': if self.food not in env.food: self.food = random.choice(env.food) food = self.food print food.x, food.y self.x += 2 * (step * (random.random()-0.5)) self.y += 2 * (step * (random.random()-0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x,y) self.x = int((t*self.x + 2*food.x) / (t+2.0)) self.y = int((t*self.y + 2*food.y) / (t+2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == 'eat': self.eat(state, env) if state <= 6: return self.fd - (abs(self.x-self.food.x) + abs(self.y-self.food.y)) elif state <= 11: hd = 0 for hawkxy in self.hawkxy: hd += (abs(self.x-hawkxy[0]) + abs(self.y-hawkxy[1])) return (self.hd - hd) #print self.x, self.y #print action def adjustxy(self): self.dumbed = 1 if self.x < 0: self.x = 0 elif self.x > width: self.x = width elif self.y < 0: self.y = 0 elif self.y > height: self.y = height else: self.dumbed = 0 def getState(self, env): if self.dangerous: r = Predator.senserange+Prey.distinct self.food = random.choice(env.food) else: r = Prey.senserange hawks = env.findall('hawk', (self.x, self.y), r) if len(hawks) != 0: self.hawkxy = [] self.dangerous = 1 if len(hawks) == 1: hawk = hawks[0] err = 0 state = [] if hawk.y + err <= self.y and hawk.x - err >= self.x: state += [7] # 1 up right if hawk.y + err <= self.y and hawk.x + err <= self.x: state += [8] # 2 up left if hawk.y - err >= self.y and hawk.x + err <= self.x: state += [9] # 3 down left if hawk.y - err >= self.y and hawk.x - err >= self.x: state += [10] # 4 down right self.hawkxy.append((hawk.x, hawk.y)) self.hd = abs(self.x-hawk.x) + abs(self.y-hawk.y) return random.choice(state) else: self.hd = 0 for hawk in hawks: self.hawkxy.append((hawk.x, hawk.y)) self.hd += (abs(self.x-hawk.x)+abs(self.y-hawk.y)) return 11 # many hawks self.dangerous = 0 food = env.find('food', (self.x, self.y), Prey.senserange) err = 10 state = [] if food != None: if food.y + err < self.y: state += [1] * (abs(food.y - self.y) * int(10*random.random()+1)) # up if food.y - err > self.y: state += [2] * (abs(food.y - self.y) * int(10*random.random()+1)) # down if food.x + err < self.x: state += [3] * (abs(food.x - self.x) * int(10*random.random()+1))# left if food.x - err > self.x: state += [4] * (abs(food.x - self.x) * int(10*random.random()+1))# right if abs(food.y-self.y) <= err and abs(food.x-self.x) <= err: state.append(5) # on self.food = food else: state.append(6) # nothing if self.food not in env.food: self.food = random.choice(env.food) self.fd = abs(self.x-self.food.x) + abs(self.y-self.food.y) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing #print state #print state #print state #food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, nstate, action, dis): #if state==6: reward = 0 if action == 'eat': reward -= 25 if state == 5: reward += 125 if state<=6: # for food if dis<=0: # <=0 further reward -= 100 else: reward -= 10 elif state<=11: # for hawk #reward if dis<0: # <=0 further reward += 20 else: reward -= 40 if self.dumbed: reward -= 200 #if state<=6 and nstate>6: # reward -= 1000 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 #self.reward -= 1; # else: # return -10 def eat(self, state, env): if state==5: # print 'eating' food = env.find('food', (self.x, self.y), Prey.senserange) env.remove(food) #if __name__ == "__main__": # p = Prey() # print p.x
class Prey(Object): senserange = 100 distinct = 10 #actions = ['up', 'down', 'left','right', 'stroll', 'eat'] actions = ['up', 'down', 'left','right', 'stay', 'eat'] def __init__(self, x=0, y=0, file='qlearn.txt'): Object.__init__(self, 'prey', x, y) self.qlearn = QLearn(Prey.actions) #self.origin = (self.x, self.y) self.dangerous = 0 self.step = 4 self.dumbed = 0 self.lastact = None self.foodeaten = 0 fin = open(file, 'r') lines = fin.readlines() for line in lines: content = line.split() state = int(content[0]) action = content[1] value = float(content[2]) self.qlearn.setQ(state, action, value) #print content # self.qlearn.setQ(1, 'up', 10) # self.qlearn.setQ(2, 'down', 10) # self.qlearn.setQ(3, 'left', 10) # self.qlearn.setQ(4, 'right', 10) # self.qlearn.setQ(5, 'eat', 10) # self.qlearn.setQ(6, 'stroll', 1000) self.food = None self.fd = diag self.hawk = None self.hd = diag self.bush = None self.bd = diag self.hunger = 0 def tick(self, env): #qLearn. # initial reward for each step currentState = self.getState(env) action = self.qlearn.chooseAction(currentState) self.act(currentState, action, env) #print dis reward = self.getReward(currentState, action) #update energies and get the reward after performing action nextState = self.getState(env) #get the new state after performing actions print currentState, action, reward #self.hunger, (self.food.x, self.food.y) #if currentState>=7: # print currentState, action, reward #print "Reward is:", reward self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table def act(self, state, action, env): self.hunger += 1 step = self.step #if state == 6: # step = Prey.foodrange if action == 'up': self.y = self.y-step if action == 'down': self.y = self.y+step if action == 'left': self.x = self.x-step if action == 'right': self.x = self.x+step if action == 'stroll': if self.food not in env.food: self.food = random.choice(env.food) food = self.food print food.x, food.y self.x += 2 * (step * (random.random()-0.5)) self.y += 2 * (step * (random.random()-0.5)) x = abs(self.x - food.x) y = abs(self.y - food.y) t = max(x,y) self.x = int((t*self.x + 2*food.x) / (t+2.0)) self.y = int((t*self.y + 2*food.y) / (t+2.0)) # print 'stroll', food.x, food.y self.adjustxy() if action == 'eat': self.eat(state, env) # if state <= 6: # return self.fd - (abs(self.x-self.food.x) + abs(self.y-self.food.y)) # elif state <= 11: # hd = 0 # for hawkxy in self.hawkxy: # hd += (abs(self.x-hawkxy[0]) + abs(self.y-hawkxy[1])) # return (self.hd - hd) #print self.x, self.y #print action def adjustxy(self): self.dumbed = 1 if self.x < 0: self.x = 0 elif self.x > width: self.x = width elif self.y < 0: self.y = 0 elif self.y > height: self.y = height else: self.dumbed = 0 def getState(self, env): self.bush = env.find('bush', (self.x, self.y), Prey.senserange) self.origin = (self.x, self.y) err = 0 state = [] hawk = env.find('hawk', (self.x, self.y), Prey.senserange) if hawk != None: xdiff = hawk.x - self.x ydiff = hawk.y - self.y if abs(xdiff)>=abs(ydiff): if xdiff > 0: return 10 # hawk on the right elif xdiff < 0: return 9 # on the left else: return random.choice([7,8,9,10]) else: if ydiff > 0: return 8 # down else: return 7 # up # if hawk.y + err < self.y: # state += [7] * (100/(abs(hawk.y - self.y))) # up # if hawk.y - err > self.y: # state += [8] * (100/(abs(hawk.y - self.y))) # down # if hawk.x + err < self.x: # state += [9] * (100/(abs(hawk.x - self.x)))# left # if hawk.x - err > self.x: # state += [10] * (100/(abs(hawk.x - self.x)))# right # self.hawk = hawk # if len(state)==0: # state = [7,8,9,10] # return random.choice(state) err = 10 self.dangerous = 0 food = env.find('food', (self.x, self.y), diag) if food != None: if food.y + err < self.y: state += [1] * (abs(food.y - self.y) * int(10*random.random()+1)) # up if food.y - err > self.y: state += [2] * (abs(food.y - self.y) * int(10*random.random()+1)) # down if food.x + err < self.x: state += [3] * (abs(food.x - self.x) * int(10*random.random()+1))# left if food.x - err > self.x: state += [4] * (abs(food.x - self.x) * int(10*random.random()+1))# right if abs(food.y-self.y) <= err and abs(food.x-self.x) <= err: state.append(5) # on self.food = food else: state.append(6) # nothing if self.food not in env.food: self.food = random.choice(env.food) # if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err: # state = 5 # else: # if food.y <= self.y and food.x >= self.x: # state = 1 # quadrant 1 # elif food.y <= self.y and food.x <= self.x: # state = 2 # quadrant 2 # elif food.x < self.x and food.y > self.y: # state = 3 # quadrant 3 # #elif food.x > self.x and food.y > self.y: # else: # state = 4 # quadrant 4 # print state # else: # state = 6 # nothing #print state #print state #print state #food = env.find('food', (self.x, self.y), diag) return random.choice(state) def getReward(self, state, action): #if state==6: reward = 0 # energy consumption if action == 'eat': reward -= 10 #elif action != 'stay': # reward -= 1 # getting hungry reward -= 5 # food eaten if self.foodeaten == 1: reward += 110 # food dis if self.food != None: dis = abs(self.x-self.food.x)+abs(self.y-self.food.y) dis -= (abs(self.origin[0]-self.food.x)+abs(self.origin[1]-self.food.y)) if state <= 6: if dis < 0: reward += 10 if self.hawk == None and action == 'stay': reward -= 50 # hawk dis if self.hawk != None: dis = max(abs(self.x-self.hawk.x), abs(self.y-self.hawk.y)) dis -= max(abs(self.origin[0]-self.hawk.x), abs(self.origin[1]-self.hawk.y)) if dis > 0: reward += 20 else: dis = min(abs(self.x-self.hawk.x), abs(self.y-self.hawk.y)) dis -= min(abs(self.origin[0]-self.hawk.x), abs(self.origin[1]-self.hawk.y)) if dis > 0: reward += 10 else: reward -= 50 if self.dumbed: reward -= 200 #if state<=6 and nstate>6: # reward -= 1000 # init for next tick self.foodeaten = 0 return reward # if action == 'eat': # if state == 5: # return 100 # else: # return -100 # else: # return 0 # if state==6 and nextstate != 6: # self.reward += 0 #self.reward -= 1; # else: # return -10 def eat(self, state, env): if state==5: self.foodeaten = 1 # print 'eating' food = env.find('food', (self.x, self.y), Prey.senserange) env.remove(food) self.hunger -= 50 self.hunger = max(0, self.hunger) #if __name__ == "__main__": # p = Prey() # print p.x