Ejemplo n.º 1
0
 def __init__(self, action_space, nb_state):
     self.ai = None
     self.ai = QLearn(actions=action_space,
                      alpha=0.1,
                      gamma=0.999,
                      epsilon=0.9)
     self.lastState = None
     self.lastAction = None
     self.nb_state = nb_state
Ejemplo n.º 2
0
	def __init__(self, x, y, mapX, mapY,env):

		self.x = x
		self.y = y
		self.webX = mapX-4
		self.webY = mapY-4
		self.energy = 1000
		self.hunger = 1000
		self.location = (self.x, self.y)
		self.layWeb = False
		self.env = env
		self.lastState = None
		self.lastAction = 0
		
		self.brain = QLearn(Animat.PossibleActions) #-----Initialize animat's brain----#
Ejemplo n.º 3
0
	def __init__(self,starty,startx, env, foodTypes, max_energy = 100.0, start_energy = 50.0, idnum = 1):

		#Initialize instance parameters
		self.y = starty
		self.x = startx
		self.env = env
		self.ID = idnum
		self.foodTypes = foodTypes
		self.energy = [start_energy] * len(self.foodTypes)
		self.maxEnergy = [max_energy] * len(self.foodTypes)
		self.previousEnergy = copy.copy(self.energy)
		self.energyUsageRate = [1.0/len(self.foodTypes)] * len(self.foodTypes)
		self.foodsEaten = [0] * len(self.foodTypes)
		self.holding = [-1] * len(self.foodTypes)

		#Initialize flags
		self.moved = False #if animat moved and direction animat moved in
		self.alive = True

		#Initialize threshold parameters
		self.reproductionThreshold = 40.0 #need 40 energy units to reproduce

		#Load the Neural Net (CURRENTLY UNUSED: we are using q learner instead)
		#nni = NNInitializer()
		#self.neuralNet = nni.readNetwork(filename)

		#Update Class Parameters
		Animat.count += 1

		#Initialize Q-Table (States and Actions)
		self.qLearn = QLearn(Animat.actions)

		#Statistics
		self.multipleFoodEaten = 0;
		self.multipleDrop = 0;
Ejemplo n.º 4
0
 def __init__(self, x=0, y=0, file="qlearn.txt"):
     Object.__init__(self, "prey", x, y)
     self.qlearn = QLearn(Prey.actions)
     # self.origin = (self.x, self.y)
     self.step = 4
     self.dumbed = 0
     fin = open(file, "r")
     lines = fin.readlines()
     for line in lines:
         content = line.split()
         state = int(content[0])
         action = content[1]
         value = float(content[2])
         self.qlearn.setQ(state, action, value)
         # print content
         # self.qlearn.setQ(1, 'up', 10)
         # self.qlearn.setQ(2, 'down', 10)
         # self.qlearn.setQ(3, 'left', 10)
         # self.qlearn.setQ(4, 'right', 10)
         # self.qlearn.setQ(5, 'eat', 10)
         # self.qlearn.setQ(6, 'stroll', 1000)
     self.target = None
     self.fd = diag
     self.hawkxy = []
     self.hd = diag
Ejemplo n.º 5
0
	def __init__(self, x=0, y=0, file='qlearn.txt'):
		Object.__init__(self, 'prey', x, y)
		self.qlearn = QLearn(Prey.actions)
		#self.origin = (self.x, self.y)
		self.dangerous = 0
		self.step = 4
		self.dumbed = 0
		self.lastact = None
		self.foodeaten = 0

		fin = open(file, 'r')
		lines = fin.readlines()
		for line in lines:
			content = line.split()
			state = int(content[0])
			action = content[1]
			value = float(content[2])
			self.qlearn.setQ(state, action, value)
			#print content
		# self.qlearn.setQ(1, 'up', 10)
		# self.qlearn.setQ(2, 'down', 10)
		# self.qlearn.setQ(3, 'left', 10)
		# self.qlearn.setQ(4, 'right', 10)
		# self.qlearn.setQ(5, 'eat', 10)
		# self.qlearn.setQ(6, 'stroll', 1000)
		self.food = None
		self.fd = diag
		self.hawk = None
		self.hd = diag
		self.bush = None
		self.bd = diag
		self.hunger = 0
Ejemplo n.º 6
0
    def __init__(self,
                 starty,
                 startx,
                 env,
                 foodTypes,
                 max_energy=100.0,
                 start_energy=50.0,
                 idnum=1):

        #Initialize instance parameters
        self.y = starty
        self.x = startx
        self.env = env
        self.ID = idnum
        self.foodTypes = foodTypes
        self.energy = [start_energy] * len(self.foodTypes)
        self.maxEnergy = [max_energy] * len(self.foodTypes)
        self.previousEnergy = copy.copy(self.energy)
        self.energyUsageRate = [1.0 / len(self.foodTypes)] * len(
            self.foodTypes)
        self.foodsEaten = [0] * len(self.foodTypes)
        self.holding = [-1] * len(self.foodTypes)

        #Initialize flags
        self.moved = False  #if animat moved and direction animat moved in
        self.alive = True

        #Initialize threshold parameters
        self.reproductionThreshold = 40.0  #need 40 energy units to reproduce

        #Load the Neural Net (CURRENTLY UNUSED: we are using q learner instead)
        #nni = NNInitializer()
        #self.neuralNet = nni.readNetwork(filename)

        #Update Class Parameters
        Animat.count += 1

        #Initialize Q-Table (States and Actions)
        self.qLearn = QLearn(Animat.actions)

        #Statistics
        self.multipleFoodEaten = 0
        self.multipleDrop = 0
Ejemplo n.º 7
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.learning_rate = 0.5
     self.discounting_factor = 0.3
     self.default_val = 2
     self.max_trials = 100
     self.l_initial_state = None
     self.l_initial_action = None
     self.l_initial_reward = None
     self.x_hit = range(0,self.max_trials)
     self.y_hit = range(0,self.max_trials)
     self.y_steps = range(0,self.max_trials)
     self.counter = -1
     self.steps_counter = -1
     self.enforce_deadline = True
     self.update_delay=0
     self.display = False
     self.QLearner = QLearn(l_actions=Environment.valid_actions,l_learning_rate =self.learning_rate,l_discounting_factor =self.discounting_factor,l_default_val = self.default_val)
Ejemplo n.º 8
0
class QAgent():
    def __init__(self, action_space, nb_state):
        self.ai = None
        self.ai = QLearn(actions=action_space,
                         alpha=0.1,
                         gamma=0.999,
                         epsilon=0.9)
        self.lastState = None
        self.lastAction = None
        self.nb_state = nb_state

    def calculate_state(self, obs):
        pos = -1
        speed = -1
        step = 1.8 / self.nb_state
        step_speed = 0.14 / self.nb_state
        j = 0
        for i in np.arange(-1.2, 0.6, step):
            if min(i, i + step) <= obs[0] <= max(i, i + step):
                pos = j
            j += 1

        j = 0
        for i in np.arange(-0.07, 0.07, step_speed):
            if min(i, i + step_speed) <= obs[1] <= max(i, i + step_speed):
                speed = j
            j += 1

        return (pos, speed)

    def action(self, obs, reward):
        state = self.calculate_state(obs)
        if self.lastState is not None:
            self.ai.learn(self.lastState, self.lastAction, state, reward)

        action = self.ai.choose_action(state)
        self.lastState = state
        self.lastAction = action
        return action
##############################################################################

import xplane_sim as sim

################################################################################

from QLearn import QLearn
from XPlaneEnv import XPlaneEnv

#############################################################################
# y or gamma should be important if care about the future
# epsilon small and preferably decay
## n_states = 9x9x9 = 729   ## find index in cube encoding approach, not binary approach
## 3 vectors of 9 each for pitch, roll, jaw. Equivalent to reading from instruments (six pack)

Q = QLearn(729, 6, 0.95, 0.01,
           0.10)  # (n_states, n_actions, gamma, learning rate, epsilon)

#################################################################################
# [lat, long, elev, pitch, roll, true heading/yaw , gear] ##-998 -> NO CHANGE
##  self.starting_position = orig
##  self.destination_position = dest
##  self.actions_binary_n = acts_bin   ## binary possibilities
##  self.end_game_threshold = end_param  ## end_game_distance_threshold

flight_origin = [37.524, -122.06899, 6000, -998, -998, -998, 1]  # Palo Alto
flight_destinaion = [37.505, -121.843611, 6000, -998, -998, -998,
                     1]  # Sunol Valley

## (orig, dest, actions_bin_n, end_param) ## end_param = 50 feet
env = XPlaneEnv(flight_origin, flight_destinaion, 6, 50.0)
Ejemplo n.º 10
0
class Animat:
	PossibleActions = [0,1,2,3,4,5,6]
	def __init__(self, x, y, mapX, mapY,env):

		self.x = x
		self.y = y
		self.webX = mapX-4
		self.webY = mapY-4
		self.energy = 1000
		self.hunger = 1000
		self.location = (self.x, self.y)
		self.layWeb = False
		self.env = env
		self.lastState = None
		self.lastAction = 0
		
		self.brain = QLearn(Animat.PossibleActions) #-----Initialize animat's brain----#


	def animate(self):
		
		#--------Get current State--------#
		curState = self.getState()
		actionChosen = self.brain.chooseAction(curState)
		#--------Update Animat's energy levels------#
		if self.energy > 0:
			self.energy -= 0.5
		if self.hunger < 1000:
			self.hunger += 0.1
		#--------Calculate rewards from the selected action--------#
		selectedAction = actionChosen
		self.performAction(selectedAction)
		reward = self.getReward(curState,selectedAction)
		nextState = self.getState()
		#print "curState :",curState," Action:",selectedAction," reward:",reward," next:",nextState," web:",self.layWeb
		self.brain.learn(curState, selectedAction, reward, nextState)



	def performAction(self, selectedAction):
		self.layWeb = 0
		if selectedAction == 0:
			self.explore(0)
		elif selectedAction == 1:
			self.explore(1)
		elif selectedAction == 2:
			self.explore(2)
		elif selectedAction == 3:
			self.explore(3)
		elif selectedAction == 4:
			self.eat()
			#print "Eat"
		elif selectedAction == 5:
			self.layWeb = 1
			if (self.findFood() < 4):
				direct = self.findFood()
			else:
				direct = random.randrange(0,4)
			self.explore(direct)
		elif selectedAction ==6:
			pass
		else:
			pass



	def explore(self,direction):
		#direction = random.randrange(0,5)
		direction = direction
		if direction == 0:
			if (self.y+1 < self.webY+1 and self.env.canWalk(self.x,self.y+1)):
				self.y += 1
			else: pass
		if direction == 1:
			if (self.x+1 < self.webX+1 and self.env.canWalk(self.x+1,self.y)):
				self.x += 1
			else: pass
		if direction == 2:
			if (self.x-1 > 3 and self.env.canWalk(self.x-1,self.y)):
				self.x -= 1
			else: pass
		if direction == 3:
			if (self.y-1 > 3 and self.env.canWalk(self.x,self.y-1)):
				self.y -= 1
			else: pass

	def eat(self):
		if (self.env.returnFoodID(self.x,self.y) != -1):
			if self.energy < 960:
				self.energy += 40
			if self.hunger > 0:
				self.hunger -= 20
			self.env.removeFood(self.env.returnFoodID(self.x,self.y))			
		else:
			pass

	def findFood(self):
		direction = 0
		# self.env.getFoodgradient(self.x,self.y)
		if (self.env.foodList):
			self.env.getFoodgradient(self.x,self.y)
			for i in range(0, len(self.env.foodList)):
				if (self.env.canWalk(self.env.foodList[i].x,self.env.foodList[i].y)):
					foodTarget = self.env.foodList[i]
					if (self.x != foodTarget.x):
						if(self.x < foodTarget.x):
							direction = 1
							if(self.x > foodTarget.x-2 and self.x < foodTarget.x+2 and self.y > foodTarget.y-2 and self.y < foodTarget.y+2):
								direction += 6
								#print "close to food:",self.x,",",self.y,"f:",foodTarget.x,",",foodTarget.y
							#print "east"
							return direction
						elif(self.x > foodTarget.x):

							direction = 2
							if(self.x > foodTarget.x-2 and self.x < foodTarget.x+2 and self.y > foodTarget.y-2 and self.y < foodTarget.y+2):
								direction += 6
							return direction
					elif (self.y != foodTarget.y):
						if(self.y < foodTarget.y):
							direction = 0
							if(self.y > foodTarget.y-2 and self.y < foodTarget.y+2 and self.x > foodTarget.x-2 and self.x < foodTarget.x+2):
								direction += 6
							return direction
						elif(self.y > foodTarget.y):

							direction += 3
							if(self.y > foodTarget.y-2 and self.y < foodTarget.y+2 and self.x > foodTarget.x-2 and self.x < foodTarget.x+2):
								direction += 6

							return direction
					else:
						if(self.hunger > 0):
							direction = 4

						else:
							direction = 10
						return direction
		
		direction = 5
		return direction

	def getState(self):
		state = self.findFood()
		
		return state 

	def getReward(self, state, action):
		reward = -1
		state = state
		action = action
		if(action < 4 and action == state): #----- follow food gradient----#
			reward += 10
		elif(state == 4):
			if(action == 4):
				reward += 50
			else:
				reward = -30
		elif(state == 10): #------On top of food b/ not hungry----#
			if(action == 5):
				reward += 50
			elif(action == 4):
				reward =-1

		elif(state == 5):	#------No food -----#	
			if(action == 6):
				reward += 10
			elif(action != 4 and action != 5):
				reward += 5
			else:
				reward -= 10
		if(state > 5 and state == action+6): #----- close to food source---#
			reward += 10
		if(state > 5 and action == 5):
			reward += 50
		return reward 
Ejemplo n.º 11
0
class Prey(Object):

    senserange = 80
    distinct = 10

    # actions = ['up', 'down', 'left','right', 'stroll', 'eat']
    actions = ["up", "down", "left", "right", "eat", "stay"]

    def __init__(self, x=0, y=0, file="qlearn.txt"):
        Object.__init__(self, "prey", x, y)
        self.qlearn = QLearn(Prey.actions)
        # self.origin = (self.x, self.y)
        self.step = 4
        self.dumbed = 0
        fin = open(file, "r")
        lines = fin.readlines()
        for line in lines:
            content = line.split()
            state = int(content[0])
            action = content[1]
            value = float(content[2])
            self.qlearn.setQ(state, action, value)
            # print content
            # self.qlearn.setQ(1, 'up', 10)
            # self.qlearn.setQ(2, 'down', 10)
            # self.qlearn.setQ(3, 'left', 10)
            # self.qlearn.setQ(4, 'right', 10)
            # self.qlearn.setQ(5, 'eat', 10)
            # self.qlearn.setQ(6, 'stroll', 1000)
        self.target = None
        self.fd = diag
        self.hawkxy = []
        self.hd = diag

    def tick(self, env):
        # qLearn.
        # initial reward for each step
        self.init()
        currentState = self.getState(env)
        action = self.qlearn.chooseAction(currentState)
        self.act(currentState, action, env)
        # print dis
        nextState = self.getState(env)  # get the new state after performing actions
        reward = self.getReward(
            currentState, nextState, action
        )  # update energies and get the reward after performing action
        if currentState == 11:
            print currentState, action, reward
            # 	#time.sleep(1)

            # print "Reward is:", reward
        self.qlearn.learn(currentState, action, reward, nextState)  # update the Q Table

    def init(self):
        self.hawk = None
        self.reward = 0
        self.pdis = 0
        self.safe = 0

    def act(self, state, action, env):
        step = self.step
        # if state == 6:
        # 	step = Prey.foodrange
        if action == "up":
            self.y = self.y - step
        if action == "down":
            self.y = self.y + step
        if action == "left":
            self.x = self.x - step
        if action == "right":
            self.x = self.x + step
        if action == "stroll":
            if self.target not in env.food:
                self.target = random.choice(env.food)
            food = self.target

            self.x += 2 * (step * (random.random() - 0.5))
            self.y += 2 * (step * (random.random() - 0.5))

            x = abs(self.x - food.x)
            y = abs(self.y - food.y)
            t = max(x, y)
            self.x = int((t * self.x + 2 * food.x) / (t + 2.0))
            self.y = int((t * self.y + 2 * food.y) / (t + 2.0))

            # print 'stroll', food.x, food.y
        self.adjustxy()
        if action == "eat":
            self.eat(state, env)
            # print self.x, self.y
            # print action
        if self.pdis != 0:
            hawk = self.hawk
            diffx = hawk.x - self.x
            diffy = hawk.y - self.y
            dis = abs(diffx) + abs(diffy)
            self.reward = dis - self.pdis
        else:
            dis = abs(self.x - self.target.x) + abs(self.y - self.target.y)
            self.reward = self.fdis - dis

    def adjustxy(self):
        self.dumbed = 0
        if self.x < 0:
            self.x = 0
            self.dumbed = 1
        elif self.x > width:
            self.x = width
            self.dumbed = 1
        if self.y < 0:
            self.y = 0
            self.dumbed = 1
        elif self.y > height:
            self.y = height
            self.dumbed = 1

    def getState(self, env):

        hawk = env.find("hawk", (self.x, self.y), Prey.senserange)
        if hawk != None:
            err = 10
            shelter = env.find("hawkshelter", (self.x, self.y), Prey.senserange)
            if shelter != None and abs(shelter.y - self.y) <= err and abs(shelter.x - self.x) <= err:
                self.safe = 1
                return 11  # right shelter

            self.hawk = hawk
            state = []
            diffx = hawk.x - self.x
            diffy = hawk.y - self.y
            x = self.x - 2 * diffx
            y = self.y - 2 * diffy

            if x < 0:
                x = 0
            elif x > width:
                x = width
            if y < 0:
                y = 0
            elif y > height:
                y = height

            self.target = Food(x, y)
            # print self.target.x, self.target.y
            # 7 8 9 10  ## coordinate 1 2 3 4
            err = 5
            if diffy < -err:
                state += [7]
            if diffy > err:
                state += [8]
            if diffx < -err:
                state += [9]
            if diffx > err:
                state += [10]
            self.pdis += abs(diffx) + abs(diffy)
            # print state
            if state == []:
                state = [7, 8, 9, 10]
            return random.choice(state)

        err = 10
        food = env.find("food", (self.x, self.y), diag)
        if self.target == None:
            self.target = food
        if self.target not in env.food:
            if abs(self.target.y - self.y) <= err and abs(self.target.x - self.x) <= err:
                self.target = random.choice(env.food)

        food = env.find("food", (self.x, self.y), Prey.senserange)
        if food != None:
            self.target = food

        food = self.target

        state = []
        if food != None:
            if food.y + err < self.y:
                state += [1] * (abs(food.y - self.y) * int(10 * random.random() + 1))  # up
            if food.y - err > self.y:
                state += [2] * (abs(food.y - self.y) * int(10 * random.random() + 1))  # down
            if food.x + err < self.x:
                state += [3] * (abs(food.x - self.x) * int(10 * random.random() + 1))  # left
            if food.x - err > self.x:
                state += [4] * (abs(food.x - self.x) * int(10 * random.random() + 1))  # right
            if abs(food.y - self.y) <= err and abs(food.x - self.x) <= err:
                state.append(5)  # on
                # food = env.find('food', (self.x, self.y), Prey.senserange)
                # if food!=None:
                # 	self.target = food
                # else:
                # 	if self.target == None:
                # 		self.target = random.choice(env.food)
                # 	if self.target not in env.food and random.random()<0.05:
                # 		self.target = random.choice(env.food)
        else:
            state.append(6)  # nothing
            if self.target == None:
                self.target = random.choice(env.food)
            if self.target not in env.food and random.random() < 0.05:
                self.target = random.choice(env.food)
        self.fdis = abs(self.x - self.target.x) + abs(self.y - self.target.y)

        # 	if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err:
        # 		state = 5
        # 	else:
        # 		if food.y <= self.y and food.x >= self.x:
        # 			state = 1 # quadrant 1
        # 		elif food.y <= self.y and food.x <= self.x:
        # 			state = 2 # quadrant 2
        # 		elif food.x < self.x and food.y > self.y:
        # 			state = 3 # quadrant 3
        # 		#elif food.x > self.x and food.y > self.y:
        # 		else:
        # 			state = 4 # quadrant 4
        # 			print state
        # else:
        # 	state = 6 # nothing
        # print state
        # print state
        # print state
        # food = env.find('food', (self.x, self.y), diag)
        return random.choice(state)

    def getReward(self, state, nstate, action):
        # if state==6:

        reward = self.reward

        if self.safe == 1:
            reward += 100

        if action == "eat":
            reward -= 25
            if state == 5:
                reward += 125
                # if state<=4:   # for food
                # 	if dis<=0: # <=0 further
                # 		reward -= 10
                # elif state == 6:
                # 	if dis>0:
                # 		reward += int(random.random()*2)*10
                # elif state<=11 and state>=7: # for hawk
                # 	#reward
                # 	if dis<0: # <=0 further
                # 		reward += 20
                # 	else:
                # 		reward -= 40

        if self.dumbed:
            reward -= 100

            # if state<=6 and nstate>6:
            # 	print state
            # 	reward -= 300

        return reward
        # if action == 'eat':
        # 	if state == 5:
        # 		return 100
        # 	else:
        # 		return -100
        # else:
        # 	return 0
        # 	if state==6 and nextstate != 6:
        # 		self.reward += 0
        # self.reward -= 1;
        # else:
        # 	return -10

    def eat(self, state, env):
        if state == 5:
            # print 'eating'
            food = env.find("food", (self.x, self.y), Prey.senserange)
            if food != None:
                env.remove(food)
Ejemplo n.º 12
0
# RUN THIS to train using QL
import numpy as np
import os.path

from gridnet import GridNet
from QLearn import QLearn

#------------------main----------------------------------------
QPATH = './q.npy'
Training_steps = 50000
#Training_steps = 5

gnet = GridNet()
qlearn = QLearn(gnet.n_size, gnet.n_BS, gnet.n_actions, gamma=0.9)

print('Training ...')
total_score = 0
for i in range(Training_steps):
    #random x, y, prev_BS and action
    x = np.random.randint(gnet.n_size)
    y = np.random.randint(gnet.n_size)
    #prev_BS = -1 means no prev BS. Use for source.
    prev_BS = np.random.randint(-1, gnet.n_BS)
    obs = [x, y, prev_BS]
    action = np.random.randint(gnet.n_actions)
    obs_, reward = gnet.step(obs, action)
    score = qlearn.train(obs, action, obs_, reward)
    total_score = total_score + score
    if (i % 1000 == 0):  #caculate loss every 1000 steps
        loss = total_score / 1000.0
        loss = np.sqrt(loss)
Ejemplo n.º 13
0
class Prey(Object):
	
	senserange = 80
	distinct = 10

	#actions = ['up', 'down', 'left','right', 'stroll', 'eat']
	actions = ['up', 'down', 'left','right', 'stay', 'eat']
	def __init__(self, x=0, y=0, file='qlearn.txt'):
		Object.__init__(self, 'prey', x, y)
		self.qlearn = QLearn(Prey.actions)
		#self.origin = (self.x, self.y)
		self.dangerous = 0
		self.step = 4
		self.dumbed = 0
		fin = open(file, 'r')
		lines = fin.readlines()
		for line in lines:
			content = line.split()
			state = int(content[0])
			action = content[1]
			value = float(content[2])
			self.qlearn.setQ(state, action, value)
			#print content
		# self.qlearn.setQ(1, 'up', 10)
		# self.qlearn.setQ(2, 'down', 10)
		# self.qlearn.setQ(3, 'left', 10)
		# self.qlearn.setQ(4, 'right', 10)
		# self.qlearn.setQ(5, 'eat', 10)
		# self.qlearn.setQ(6, 'stroll', 1000)
		self.food = None
		self.fd = diag
		self.hawkxy = []
		self.hd = diag
		self.bush = None
		self.bd = diag

	def tick(self, env):
		#qLearn.
		# initial reward for each step
		currentState  = self.getState(env)
		action = self.qlearn.chooseAction(currentState)
		dis = self.act(currentState, action, env)
		nextState = self.getState(env) #get the new state after performing actions
		#print dis
		reward = self.getReward(currentState, nextState, action, dis) #update energies and get the reward after performing action
		if currentState>=7:
			print currentState, action, reward, dis
		
		#print "Reward is:", reward
		self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table

	def act(self, state, action, env):
		step = self.step
		#if state == 6:
		#	step = Prey.foodrange
		if action == 'up':
			self.y = self.y-step
		if action == 'down':
			self.y = self.y+step
		if action == 'left':
			self.x = self.x-step
		if action == 'right':
			self.x = self.x+step
		if action == 'stroll':		
			if self.food not in env.food:
				self.food = random.choice(env.food)
			food = self.food
			print food.x, food.y

			self.x += 2 * (step * (random.random()-0.5))
			self.y += 2 * (step * (random.random()-0.5))
			
			x = abs(self.x - food.x)
			y = abs(self.y - food.y)
			t = max(x,y)
			self.x = int((t*self.x + 2*food.x) / (t+2.0))
			self.y = int((t*self.y + 2*food.y) / (t+2.0))
			
			# print 'stroll', food.x, food.y
		self.adjustxy()
		if action == 'eat':
			self.eat(state, env)
		if state <= 6:
			return self.fd - (abs(self.x-self.food.x) + abs(self.y-self.food.y))
		elif state <= 11:
			hd = 0
			for hawkxy in self.hawkxy:
				hd += (abs(self.x-hawkxy[0]) + abs(self.y-hawkxy[1]))
			return (self.hd - hd)
		#print self.x, self.y
		#print action

	def adjustxy(self):
		self.dumbed = 1
		if self.x < 0:
			self.x = 0
		elif self.x > width:
			self.x = width
		elif self.y < 0:
			self.y = 0
		elif self.y > height:
			self.y = height
		else:
			self.dumbed = 0

	def getState(self, env):
		if self.dangerous:
			r = Predator.senserange+Prey.distinct
			self.food = random.choice(env.food)
		else:
			r = Prey.senserange
		hawks = env.findall('hawk', (self.x, self.y), r)
		if len(hawks) != 0:
			self.hawkxy = []
			self.dangerous = 1
			if len(hawks) == 1:
				hawk = hawks[0]
				err = 0
				state = []
				if hawk.y + err <= self.y and hawk.x - err >= self.x:
					state += [7]  # 1 up right
				if hawk.y + err <= self.y and hawk.x + err <= self.x:
					state += [8]  # 2 up left
				if hawk.y - err >= self.y and hawk.x + err <= self.x:
					state += [9]  # 3 down left
				if hawk.y - err >= self.y and hawk.x - err >= self.x:
					state += [10] # 4 down right
				self.hawkxy.append((hawk.x, hawk.y))
				self.hd = abs(self.x-hawk.x) + abs(self.y-hawk.y)
				return random.choice(state)
			else:
				self.hd = 0
				for hawk in hawks:
					self.hawkxy.append((hawk.x, hawk.y))
					self.hd += (abs(self.x-hawk.x)+abs(self.y-hawk.y))
				return 11 # many hawks

		self.dangerous = 0
		food = env.find('food', (self.x, self.y), Prey.senserange)
		err = 10
		state = []
		if food != None:
			if food.y + err < self.y:
				state += [1] * (abs(food.y - self.y) * int(10*random.random()+1)) # up
			if food.y - err > self.y:
				state += [2] * (abs(food.y - self.y) * int(10*random.random()+1)) # down
			if food.x + err < self.x:
				state += [3] * (abs(food.x - self.x) * int(10*random.random()+1))# left
			if food.x - err > self.x:
				state += [4] * (abs(food.x - self.x) * int(10*random.random()+1))# right
			if abs(food.y-self.y) <= err and abs(food.x-self.x) <= err:
				state.append(5) # on
			self.food = food
		else:
			state.append(6) # nothing	
			if self.food not in env.food:
				self.food = random.choice(env.food)
		self.fd = abs(self.x-self.food.x) + abs(self.y-self.food.y)
			
		# 	if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err:
		# 		state = 5
		# 	else:
		# 		if food.y <= self.y and food.x >= self.x:
		# 			state = 1 # quadrant 1
		# 		elif food.y <= self.y and food.x <= self.x:
		# 			state = 2 # quadrant 2
		# 		elif food.x < self.x and food.y > self.y:
		# 			state = 3 # quadrant 3
		# 		#elif food.x > self.x and food.y > self.y:
		# 		else:
		# 			state = 4 # quadrant 4
		# 			print state
		# else:
		# 	state = 6 # nothing
			#print state
		#print state
		#print state
		#food = env.find('food', (self.x, self.y), diag)
		return random.choice(state)

	def getReward(self, state, nstate, action, dis):
		#if state==6:
		reward = 0
		if action == 'eat':
			reward -= 25
			if state == 5:
				reward += 125
		if state<=6:   # for food 
			if dis<=0: # <=0 further
				reward -= 100
			else:
				reward -= 10
		elif state<=11: # for hawk
			#reward 
			if dis<0: # <=0 further
				reward += 20
			else:
				reward -= 40

		if self.dumbed:
			reward -= 200

		#if state<=6 and nstate>6:
		#	reward -= 1000

		return reward
		# if action == 'eat':
		# 	if state == 5:
		# 		return 100
		# 	else:
		# 		return -100
		# else:
		# 	return 0
	#	if state==6 and nextstate != 6:
	#		self.reward += 0
		#self.reward -= 1;
		# else:
		# 	return -10

	def eat(self, state, env):
		if state==5:
			# print 'eating'
			food = env.find('food', (self.x, self.y), Prey.senserange)
			env.remove(food)

#if __name__ == "__main__":
#	p = Prey()

#	print p.x
Ejemplo n.º 14
0
class Animat:

    #Class Parameters
    energyPerTick = []
    singleFoodEaten = []
    multipleFoodEaten = []
    energyPerTickIndex = 0
    singleFoodEatenIndex = 0
    multipleFoodEatenIndex = 0
    count = 0
    ID = -1
    allowDeath = False
    foodTargeting = True
    energyThreshold = 80
    actions = ['north', 'south', 'east', 'west', 'eat', 'pickup', 'drop']

    def __init__(self,
                 starty,
                 startx,
                 env,
                 foodTypes,
                 max_energy=100.0,
                 start_energy=50.0,
                 idnum=1):

        #Initialize instance parameters
        self.y = starty
        self.x = startx
        self.env = env
        self.ID = idnum
        self.foodTypes = foodTypes
        self.energy = [start_energy] * len(self.foodTypes)
        self.maxEnergy = [max_energy] * len(self.foodTypes)
        self.previousEnergy = copy.copy(self.energy)
        self.energyUsageRate = [1.0 / len(self.foodTypes)] * len(
            self.foodTypes)
        self.foodsEaten = [0] * len(self.foodTypes)
        self.holding = [-1] * len(self.foodTypes)

        #Initialize flags
        self.moved = False  #if animat moved and direction animat moved in
        self.alive = True

        #Initialize threshold parameters
        self.reproductionThreshold = 40.0  #need 40 energy units to reproduce

        #Load the Neural Net (CURRENTLY UNUSED: we are using q learner instead)
        #nni = NNInitializer()
        #self.neuralNet = nni.readNetwork(filename)

        #Update Class Parameters
        Animat.count += 1

        #Initialize Q-Table (States and Actions)
        self.qLearn = QLearn(Animat.actions)

        #Statistics
        self.multipleFoodEaten = 0
        self.multipleDrop = 0

    def tick(self):
        #qLearn.
        currentState = self.getState()
        targetFood = self.getTargetFoodSource()
        targetFoodDirection = self.senseEnvironment(targetFood)
        #print targetFood,self.energy
        action = self.qLearn.chooseAction(currentState)
        self.performQLearnAction(action)
        reward = self.getReward(
            targetFoodDirection, action
        )  #update energies and get the reward after performing action
        #print "Reward is:", reward
        nextState = self.getState(
        )  #get the new state after performing actions
        self.qLearn.learn(currentState, action, reward,
                          nextState)  #update the Q Table
        self.resetFlags()
        self.checkDeath()
        return self.alive

    #Perform action based on input action. Should return the integer value
    #of the +/- reward experienced from performing the action
    def performQLearnAction(self, action):

        if action == 'north':
            self.move(self.y - 1, self.x)

        if action == 'south':
            self.move(self.y + 1, self.x)

        if action == 'east':
            self.move(self.y, self.x + 1)

        if action == 'west':
            self.move(self.y, self.x - 1)

        if action == 'eat':
            self.eatAll()

        if action == 'pickup':
            self.pickupAnything()

        if action == 'drop':
            self.dropAnything()

    def getState(self):
        # Pick 1 or 0 for each state, add to total,
        # then shift total <<

        total = 1

        if Animat.foodTargeting:
            total *= 100
            targetFood = self.getTargetFoodSource()
            if targetFood == 0:
                total += 0
            elif targetFood == 1:
                total += 1
            elif targetFood == 2:
                total += 10
            elif targetFood == 3:
                total += 11
        for i in self.foodTypes:
            total *= 10
            total += 1 if (self.holding[self.foodTypes[i]] > 0) else 0
            total *= 10
            total += 1 if (self.isOnFood(i)) else 0
            foodgradient = self.senseEnvironment(i)
            total *= 10
            total *= 10
            if (foodgradient == 'north'):
                total += 0
            elif (foodgradient == 'south'):
                total += 1
            elif (foodgradient == 'west'):
                total += 10
            elif (foodgradient == 'east'):
                total += 11

        return int(str(total), 2)

    @classmethod
    def randomStart(cls, sizey, sizex):
        # Given the size of the environment, start at random location
        #self.y = random.randint(1,sizey-1) - 1
        #self.x = random.randint(1,sizex-1) - 1
        return cls(
            random.randint(1, sizey - 1) - 1,
            random.randint(1, sizex - 1) - 1)

    @classmethod
    def setDeath(death):
        Animat.allowDeath = death

    @classmethod
    def resetStats(Death):
        Animat.energyPerTick = []
        Animat.singleFoodEaten = []
        Animat.multipleFoodEaten = []
        Animat.energyPerTickIndex = 0
        Animat.singleFoodEatenIndex = 0
        Animat.multipleFoodEatenIndex = 0

    @classmethod
    def startTick(self):
        Animat.energyPerTick.append(0)
        Animat.singleFoodEaten.append(0)
        Animat.multipleFoodEaten.append(0)

        if (Animat.singleFoodEatenIndex > 0):
            Animat.singleFoodEaten[
                Animat.singleFoodEatenIndex] += Animat.singleFoodEaten[
                    Animat.singleFoodEatenIndex - 1]

        if (Animat.multipleFoodEatenIndex > 0):
            Animat.multipleFoodEaten[
                Animat.multipleFoodEatenIndex] += Animat.multipleFoodEaten[
                    Animat.multipleFoodEatenIndex - 1]

    @classmethod
    def endTick(self):
        Animat.energyPerTickIndex += 1
        Animat.singleFoodEatenIndex += 1
        Animat.multipleFoodEatenIndex += 1

    def displayLocation(self):
        print "y is " + str(self.y) + ", x is " + str(self.x)

    def move(self, newy, newx):
        if self.env[0].canMove(self.y, self.x, newy, newx):
            self.y = newy
            self.x = newx
            self.moved = True
            for f in self.foodTypes:
                if (self.holding[f] >= 0):
                    # move the food I'm holding
                    self.env[f].returnFood(self.holding[f]).y = self.y
                    self.env[f].returnFood(self.holding[f]).x = self.x
                    break

    def pickupAnything(self):
        # Go through the food types
        for i, foodType in enumerate(self.foodTypes):
            if self.pickup(foodType):
                return

    def pickup(self, foodType):
        # Check to see if we're holding anything already.
        # Enforce holding one item at a time.
        if max(self.holding) == -1:
            foodID = self.env[foodType].returnFoodIDAt(self.y, self.x)
            if foodID != -1:
                # There is food here. Can we pick it up?
                if self.env[foodType].returnFood(foodID).pickUp():
                    # We successfully picked it up
                    self.holding[foodType] = self.env[foodType].returnFoodIDAt(
                        self.y, self.x)
                    return True
        return False

    def dropAnything(self):
        # Drop whatever we're holding
        for i, foodType in enumerate(self.foodTypes):
            if self.drop(foodType):
                return

    def drop(self, foodType):
        # Check to see
        if self.holding[foodType] != -1:
            # Check to see if we're about to drop one food type on a different food type
            # If so, increment self.multipleDrop
            # def isOnFood(self,foodType):
            for f in self.foodTypes:
                if foodType != f and self.isOnFood(f):
                    self.multipleDrop += 1
                    break

            self.env[foodType].returnFood(self.holding[foodType]).drop()
            self.holding[foodType] = -1
            return True
        return False

    def checkDeath(self):
        if Animat.allowDeath:
            for e in self.energy:
                if e <= 0:
                    Animat.count -= 1
                    self.alive = False
                    print "Animat died."
                    return

    def eatAnything(self):
        for i, foodType in enumerate(self.foodTypes):
            if self.eat(foodType):
                return

    def eatAll(self):
        for i, foodType in enumerate(self.foodTypes):
            if self.eat(foodType):
                self.foodsEaten[i] = 1
            else:
                self.foodsEaten[i] = 0
        return self.foodsEaten

    def eat(self, foodType):
        foodId = self.env[foodType].returnFoodIDAt(self.y, self.x)
        if foodId >= 0:
            foodItem = self.env[foodType].returnFood(foodId)
            if not foodItem.held:
                self.eatFood(foodItem, foodType)
                return True
        return False

    def eatFood(self, foodItem, foodType):
        foodItem.eat()
        if foodItem.size == 0:
            self.env[foodType].removeFood(foodItem.id)
            print "Food removed from environment"

    def printEnergy(self):
        print "Energy: " + str(self.energy)

    def senseEnvironment(self, foodType):
        inputValues = self.env[foodType].getScentsCEWNS(self.y, self.x)
        maxVal = max(inputValues)
        maxIndeces = [
            i for i, mymax in enumerate(inputValues) if mymax == maxVal
        ]
        if maxIndeces:
            maxIndex = choice(maxIndeces)
        if maxIndex == 0:
            state = 'center'
        if maxIndex == 1:
            state = 'east'
        if maxIndex == 2:
            state = 'west'
        if maxIndex == 3:
            state = 'north'
        if maxIndex == 4:
            state = 'south'

        return state

    def isOnFood(self, foodType):
        id = self.env[foodType].returnFoodIDAt(self.y, self.x)
        if id != -1:
            return True
        return False

    def followGradient(self, stateMachine, toEat, toFollow):
        if stateMachine == 'notholding':
            self.dropAnything()
            self.performQLearnAction(self.senseEnvironment(toEat))
            if self.isOnFood(toEat):
                if self.pickup(toEat):
                    return 'holding'
                else:
                    return 'fail'
            return 'notholding'
        elif stateMachine == 'holding':
            self.performQLearnAction(self.senseEnvironment(toFollow))
            if self.isOnFood(toFollow):
                if self.drop(toEat):
                    return 'eat'
            return 'holding'
        elif stateMachine == 'eat':
            #if self.isOnFood(toEat):
            #	self.eat(toEat);
            #	return 'eat';
            #elif self.isOnFood(toFollow):
            #	self.eat(toFollow);
            #	return 'eat';
            if self.isOnFood(toEat) or self.isOnFood(toFollow):
                if not max(self.eatAll()) == 0:
                    return 'eat'
                else:
                    return 'fail'
            else:
                return 'notholding'

    def replenishEnergy(self, energy=500.0):
        self.energy = [energy] * len(self.foodTypes)
        self.alive = True

    #reset flags for next iteration
    def resetFlags(self):
        self.moved = False
        self.followedGradient = False
        self.foodsEaten = [0] * len(self.foodTypes)

    def getTargetFoodSource(self):
        energyTilMax = [y - x for x, y in zip(self.energy, self.maxEnergy)
                        ]  # maxEnergy - currEnergy for each food source
        satiation = [y * x for x, y in zip(self.energyUsageRate, energyTilMax)]
        satiation = [
            y if x < 0 else -1 for x, y in zip(self.holding, satiation)
        ]
        maxFollowValue = max(satiation)
        targetFoodSources = [
            i for i, mymax in enumerate(satiation) if mymax == maxFollowValue
        ]
        if targetFoodSources:
            targetFood = choice(targetFoodSources)

        return targetFood

    def getReward(self, targetDirection, action):

        #Animat Parameter Constants
        LIVING_COST = 1.0
        MOVEMENT_COST = 0.01  # Cost to move one unit
        EATING_REWARD = 10.0  # Reward for eating one food source
        EATING_MULT_ENERGY = 50.0
        EATING_MULT_REWARD = 100.0
        GRADIENT_FOLLOW_REWARD = 10.0

        previousEnergy = copy.copy(self.energy)

        #print "Foods eaten: ", self.foodsEaten

        #Reward Gradient
        gradientReward = 0

        if targetDirection == action:
            LIVING_COST = 0
            MOVEMENT_COST = 0
            #gradientReward = LIVING_COST + MOVEMENT_COST #offset cost if following the target gradient
            #print targetDirection, action

        #Subtract living cost and movement cost for each energy rate
        self.energy = [
            currEnergy + EATING_REWARD * foodEaten - rate *
            (LIVING_COST + MOVEMENT_COST * self.moved)
            for currEnergy, rate, foodEaten in zip(
                self.energy, self.energyUsageRate, self.foodsEaten)
        ]
        self.energy = [
            min(currEnergy, maxEnergy)
            for currEnergy, maxEnergy in zip(self.energy, self.maxEnergy)
        ]  #Limit energy to max energy

        numFoodEaten = self.foodsEaten.count(1)

        if numFoodEaten > 1:
            for i, v in enumerate(self.foodsEaten):
                if v > 0:
                    self.energy[
                        i] += EATING_MULT_ENERGY  #Add extra energy to food buckets when they eat multiple foods

        #Compute delta energy for each energy bucket
        deltaEnergy = [
            currEnergy - prevEnergy
            for currEnergy, prevEnergy in zip(self.energy, previousEnergy)
        ]
        netDeltaEnergy = sum(deltaEnergy)  #sum up all of the delta energies

        #Determine a reward multiplier if eating multiple foods when hungry
        rewardsMultiplier = 1
        if numFoodEaten > 1 and netDeltaEnergy > 0:
            rewardsMultiplier += pow(EATING_MULT_REWARD, numFoodEaten - 1)
            print "Ate ", numFoodEaten, " food sources!"
            self.multipleFoodEaten += 1
            Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex] += 1

        if numFoodEaten > 0:
            Animat.singleFoodEaten[Animat.singleFoodEatenIndex] += 1

        reward = netDeltaEnergy * rewardsMultiplier + gradientReward

        Animat.energyPerTick[Animat.energyPerTickIndex] += netDeltaEnergy

        #print action, targetDirection, self.foodsEaten, previousEnergy, self.energy, deltaEnergy, netDeltaEnergy
        #print reward

        return reward
Ejemplo n.º 15
0
class Prey(Object):

	knowsnake = 0
	
	senserange = 80
	distinct = 10

	#actions = ['up', 'down', 'left','right', 'stroll', 'eat']
	actions = ['up', 'down', 'left', 'right', 'eat', 'stay']

	def __init__(self, gen = 'AA', x=0, y=0 , file='qlearn.txt'):
		Object.__init__(self, 'prey', x, y)
		self.qlearn = QLearn(Prey.actions)
		#self.origin = (self.x, self.y)
		self.step = 4
		self.gen = gen
		self.dumbed = 0
		fin = open(file, 'r')
		lines = fin.readlines()
		for line in lines:
			content = line.split()
			state = int(content[0])
			action = content[1]
			value = float(content[2])
			self.qlearn.setQ(state, action, value)
			#print content
		# self.qlearn.setQ(1, 'up', 10)
		# self.qlearn.setQ(2, 'down', 10)
		# self.qlearn.setQ(3, 'left', 10)
		# self.qlearn.setQ(4, 'right', 10)
		# self.qlearn.setQ(5, 'eat', 10)
		# self.qlearn.setQ(6, 'stroll', 1000)
		self.target = None
		self.fd = diag
		self.hawkxy = []
		self.hd = diag
		self.energy = 70

		
	
	def tick(self, env):
		#qLearn.
		# initial reward for each step
		self.init()
		currentState  = self.getState(env)
		action = self.qlearn.chooseAction(currentState)
		self.act(currentState, action, env)
		#print dis
		nextState = self.getState(env) #get the new state after performing actions
		reward = self.getReward(currentState, nextState, action) #update energies and get the reward after performing action
#		print currentState, action, reward
		# 	#time.sleep(1)
		
		
		
		#print "Reward is:", reward
		self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table

	def init(self):
		self.hawk = None
		self.reward = 0
		self.pdis = 0
		self.safe = []
		self.escape = 0

	def act(self, state, action, env):
		self.energy -= 0.3

		if self.energy >= 70:
			env.prey.append(Prey())
			

		if self.energy <= 0:
			env.prey.remove(self)
			print 'die from food'
		step = self.step
		#if state == 6:
		#	step = Prey.foodrange
		if action == 'up':
			self.y = self.y-step

			
		if action == 'down':
			self.y = self.y+step
		if action == 'left':
			self.x = self.x-step
		if action == 'right':
			self.x = self.x+step
		if action == 'stroll':		
			if self.target not in env.food:
				self.target = random.choice(env.food)
			food = self.target

			self.x += 2 * (step * (random.random()-0.5))
			self.y += 2 * (step * (random.random()-0.5))
			
			x = abs(self.x - food.x)
			y = abs(self.y - food.y)
			t = max(x,y)
			self.x = int((t*self.x + 2*food.x) / (t+2.0))
			self.y = int((t*self.y + 2*food.y) / (t+2.0))
			
			# print 'stroll', food.x, food.y
		self.adjustxy()
		if action == 'eat':
			self.eat(state, env)
		#print self.x, self.y
		#print action
	
		dis = abs(self.x-self.target.x) + abs(self.y-self.target.y)
		self.reward = self.fdis - dis

	def adjustxy(self):
		self.dumbed = 0
		if self.x < 0:
			self.x = 0
			self.dumbed = 1
		elif self.x > width:
			self.x = width
			self.dumbed = 1
		if self.y < 0:
			self.y = 0
			self.dumbed = 1
		elif self.y > height:
			self.y = height
			self.dumbed = 1

	def getState(self, env):
		err = self.step-1
		# check existence of predator 
		hawks = env.findall('hawk', (self.x, self.y), Prey.senserange)
		foxes = env.findall('fox', (self.x, self.y), Prey.senserange)
		snakes = env.findall('snake', (self.x, self.y), Prey.senserange)

		predators = hawks+foxes
		type = ['hawk']*min(1,len(hawks)) + ['fox']*min(1,len(foxes))

		if Prey.knowsnake > random.random():
			predators += snakes
			type += ['snake']*min(1,len(snakes))

		if snakes != []:
			Prey.knowsnake += 0.001
			Prey.knowsnake = min(1.5, Prey.knowsnake)
		
		if predators != []:
			self.escape = 1
			x = 0
			y = 0
			for predator in predators:
				x += predator.x
				y += predator.y
			x /= len(predators)
			y /= len(predators)
			nearest = env.find('predator', (self.x, self.y), Prey.senserange)
			shelter = env.findshelter(type, (self.x, self.y), (x,y), Prey.senserange)
			if shelter!=None:
				self.target = shelter
	
			else:
				# diffx = x - self.x
				# diffy = y - self.y
				x = 3*self.x-2*x
				y = 3*self.y-2*y

				if x < 0:
					if y < 0:
						x, y = -y, -x
					elif y > height:
						x, y = y-height, height+x
					elif y <= height/2:
						x, y = 0, y+Prey.senserange
					else:
						x, y = 0, y-Prey.senserange
				elif x > width:
					if y < 0:
						x, y = -y, x-width
					elif y > height:
						x, y = width+height-y, width+height-x
					elif y <= height/2:
						x, y = width, y+Prey.senserange
					else:
						x, y = width, y-Prey.senserange
				elif x < width/2:
					if y < 0:
						x, y = x+Prey.senserange, 0
					elif y > height:
						x, y = x+Prey.senserange, height
				else:
					if y < 0:
						x, y = x-Prey.senserange, 0
					elif y > height:
						x, y = x-Prey.senserange, height
				
				self.target = Object(x, y)

		else:
			food = env.findall('food', (self.x, self.y), Prey.senserange)
			if len(food)==0:
				food = None
			elif self.target not in food:
				food = random.choice(food)
			else:
				food = self.target

			if food!=None:
				self.target = food
			if self.escape == 0 and self.target!=None:
		 		if abs(self.target.y-self.y) <= err and abs(self.target.x-self.x) <= err:
		 			self.target = food

		# food = env.find('food', (self.x, self.y), Prey.senserange)
		# if food!=None:
		# 	self.target = food

		target = self.target

		state = []
		if target != None:
			if target.y + err < self.y:
				state += [1] * (abs(target.y - self.y) * int(10*random.random()+1)) # up
			if target.y - err > self.y:
				state += [2] * (abs(target.y - self.y) * int(10*random.random()+1)) # down
			if target.x + err < self.x:
				state += [3] * (abs(target.x - self.x) * int(10*random.random()+1))# left
			if target.x - err > self.x:
				state += [4] * (abs(target.x - self.x) * int(10*random.random()+1))# right
			if abs(target.y-self.y) <= err and abs(target.x-self.x) <= err:
				if target in (env.bush+env.hole+env.tree):
					state.append(6) # on
					self.safe = target.shield
				else:
					state.append(5) # on
			# food = env.find('food', (self.x, self.y), Prey.senserange)
			# if food!=None:
			# 	self.target = food
			# else:
			# 	if self.target == None:
			# 		self.target = random.choice(env.food)
			# 	if self.target not in env.food and random.random()<0.05:
			# 		self.target = random.choice(env.food)
		else:
			state.append(6) # nothing	
			if (self.target == None or self.target not in env.food):
				self.target = random.choice(env.food)

			 	

		self.fdis = abs(self.x-self.target.x) + abs(self.y-self.target.y)
			
		# 	if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err:
		# 		state = 5
		# 	else:
		# 		if food.y <= self.y and food.x >= self.x:
		# 			state = 1 # quadrant 1
		# 		elif food.y <= self.y and food.x <= self.x:
		# 			state = 2 # quadrant 2
		# 		elif food.x < self.x and food.y > self.y:
		# 			state = 3 # quadrant 3
		# 		#elif food.x > self.x and food.y > self.y:
		# 		else:
		# 			state = 4 # quadrant 4
		# 			print state
		# else:
		# 	state = 6 # nothing
			#print state
		#print state
		#print state
		#food = env.find('food', (self.x, self.y), diag)
		return random.choice(state)

	def getReward(self, state, nstate, action):
		#if state==6:

		reward = self.reward

		if self.safe != [] and self.escape == 1:
			reward += 100

		if action == 'eat':
			reward -= 25
			if state == 5:
				reward += 125
		# if state<=4:   # for food 
		# 	if dis<=0: # <=0 further
		# 		reward -= 10
		# elif state == 6:
		# 	if dis>0:
		# 		reward += int(random.random()*2)*10
		# elif state<=11 and state>=7: # for hawk
		# 	#reward 
		# 	if dis<0: # <=0 further
		# 		reward += 20
		# 	else:
		# 		reward -= 40

		if self.dumbed:
			reward -= 100

		# if state<=6 and nstate>6:
		# 	print state
		# 	reward -= 300

		return reward
		# if action == 'eat':
		# 	if state == 5:
		# 		return 100
		# 	else:
		# 		return -100
		# else:
		# 	return 0
	#	if state==6 and nextstate != 6:
	#		self.reward += 0
		#self.reward -= 1;
		# else:
		# 	return -10

	def eat(self, state, env):
		if state==5:
			# print 'eating'
			food = env.find('food', (self.x, self.y), Prey.senserange)
			if food!=None:
				self.energy += 15
				self.energy = min(self.energy, 100)
				env.remove(food)

#if __name__ == "__main__":
#	p = Prey()

#	print p.x
Ejemplo n.º 16
0
class Animat:

	#Class Parameters
	energyPerTick = []
	singleFoodEaten=[]
	multipleFoodEaten=[]
	energyPerTickIndex = 0
	singleFoodEatenIndex = 0
	multipleFoodEatenIndex = 0
	count = 0
	ID = -1;
	allowDeath = False
	foodTargeting = True
	energyThreshold = 80
	actions = ['north', 'south', 'east','west','eat','pickup','drop']

	def __init__(self,starty,startx, env, foodTypes, max_energy = 100.0, start_energy = 50.0, idnum = 1):

		#Initialize instance parameters
		self.y = starty
		self.x = startx
		self.env = env
		self.ID = idnum
		self.foodTypes = foodTypes
		self.energy = [start_energy] * len(self.foodTypes)
		self.maxEnergy = [max_energy] * len(self.foodTypes)
		self.previousEnergy = copy.copy(self.energy)
		self.energyUsageRate = [1.0/len(self.foodTypes)] * len(self.foodTypes)
		self.foodsEaten = [0] * len(self.foodTypes)
		self.holding = [-1] * len(self.foodTypes)

		#Initialize flags
		self.moved = False #if animat moved and direction animat moved in
		self.alive = True

		#Initialize threshold parameters
		self.reproductionThreshold = 40.0 #need 40 energy units to reproduce

		#Load the Neural Net (CURRENTLY UNUSED: we are using q learner instead)
		#nni = NNInitializer()
		#self.neuralNet = nni.readNetwork(filename)

		#Update Class Parameters
		Animat.count += 1

		#Initialize Q-Table (States and Actions)
		self.qLearn = QLearn(Animat.actions)

		#Statistics
		self.multipleFoodEaten = 0;
		self.multipleDrop = 0;

	def tick(self):
		#qLearn.
		currentState  = self.getState()
		targetFood = self.getTargetFoodSource()
		targetFoodDirection = self.senseEnvironment(targetFood)
		#print targetFood,self.energy
		action = self.qLearn.chooseAction(currentState)
		self.performQLearnAction(action)
		reward = self.getReward(targetFoodDirection, action) #update energies and get the reward after performing action
		#print "Reward is:", reward
		nextState = self.getState() #get the new state after performing actions
		self.qLearn.learn(currentState, action, reward, nextState) #update the Q Table
		self.resetFlags()
		self.checkDeath()
		return self.alive


	#Perform action based on input action. Should return the integer value
	#of the +/- reward experienced from performing the action
	def performQLearnAction(self,action):

		if action == 'north':
			self.move(self.y - 1, self.x)

		if action == 'south':
			self.move(self.y + 1, self.x)

		if action == 'east':
			self.move(self.y, self.x + 1)

		if action == 'west':
			self.move(self.y,self.x - 1)

		if action == 'eat':
			self.eatAll()

		if action == 'pickup':
			self.pickupAnything()

		if action == 'drop':
			self.dropAnything()

	def getState(self):
		# Pick 1 or 0 for each state, add to total,
		# then shift total << 

		total = 1;

		if Animat.foodTargeting:
			total *= 100
			targetFood = self.getTargetFoodSource();
			if targetFood == 0:
				total += 0;
			elif targetFood == 1:
				total += 1;
			elif targetFood == 2:
				total += 10;
			elif targetFood == 3:
				total += 11;
		for i in self.foodTypes:
			total *= 10;
			total += 1 if (self.holding[self.foodTypes[i]] > 0) else 0;
			total *= 10;
			total += 1 if (self.isOnFood(i)) else 0;
			foodgradient = self.senseEnvironment(i)
			total *= 10;
			total *= 10;
			if (foodgradient == 'north'):
				total += 0;
			elif (foodgradient == 'south'):
				total += 1;
			elif (foodgradient == 'west'):
				total += 10;
			elif (foodgradient == 'east'):
				total += 11;

			
		return int(str(total),2);

	@classmethod
	def randomStart(cls,sizey,sizex):
		# Given the size of the environment, start at random location
		#self.y = random.randint(1,sizey-1) - 1
		#self.x = random.randint(1,sizex-1) - 1
		return cls(random.randint(1,sizey-1) - 1,random.randint(1,sizex-1) - 1)
	
	@classmethod
	def setDeath(death):
		Animat.allowDeath = death

	@classmethod
	def resetStats(Death):
		Animat.energyPerTick = []
		Animat.singleFoodEaten = []
		Animat.multipleFoodEaten = []
		Animat.energyPerTickIndex = 0
		Animat.singleFoodEatenIndex = 0
		Animat.multipleFoodEatenIndex = 0

	@classmethod
	def startTick(self):
		Animat.energyPerTick.append(0)
		Animat.singleFoodEaten.append(0)
		Animat.multipleFoodEaten.append(0)

		if(Animat.singleFoodEatenIndex > 0):
			Animat.singleFoodEaten[Animat.singleFoodEatenIndex] += Animat.singleFoodEaten[Animat.singleFoodEatenIndex - 1]

		if(Animat.multipleFoodEatenIndex > 0):
			Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex] += Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex - 1]


	@classmethod
	def endTick(self):
		Animat.energyPerTickIndex += 1
		Animat.singleFoodEatenIndex += 1
		Animat.multipleFoodEatenIndex += 1

	def displayLocation(self):
		print "y is " + str(self.y) + ", x is " + str(self.x)
		
	def move(self,newy,newx):
		if self.env[0].canMove(self.y,self.x,newy,newx):
			self.y = newy
			self.x = newx
			self.moved = True
			for f in self.foodTypes:
				if (self.holding[f] >= 0):
					# move the food I'm holding
					self.env[f].returnFood(self.holding[f]).y = self.y;
					self.env[f].returnFood(self.holding[f]).x = self.x;
					break;
			
	def pickupAnything(self):
		# Go through the food types
		for i,foodType in enumerate(self.foodTypes):
			if self.pickup(foodType):
				return;

	def pickup(self,foodType):
		# Check to see if we're holding anything already.
		# Enforce holding one item at a time.
		if max(self.holding) == -1:
			foodID = self.env[foodType].returnFoodIDAt(self.y,self.x);
			if foodID != -1:
				# There is food here. Can we pick it up?
				if self.env[foodType].returnFood(foodID).pickUp():
					# We successfully picked it up
					self.holding[foodType] = self.env[foodType].returnFoodIDAt(self.y,self.x)
					return True;
		return False;

	def dropAnything(self):
		# Drop whatever we're holding
		for i,foodType in enumerate(self.foodTypes):
			if self.drop(foodType):
				return;

	def drop(self,foodType):
		# Check to see
		if self.holding[foodType] != -1:
			# Check to see if we're about to drop one food type on a different food type
			# If so, increment self.multipleDrop
			# def isOnFood(self,foodType):
			for f in self.foodTypes:
				if foodType != f and self.isOnFood(f):
					self.multipleDrop += 1;
					break;

			self.env[foodType].returnFood(self.holding[foodType]).drop();
			self.holding[foodType] = -1;
			return True;
		return False;

	def checkDeath(self):
		if Animat.allowDeath:
			for e in self.energy:
				if e <= 0:
					Animat.count -= 1
					self.alive = False;
					print "Animat died."
					return
	
	def eatAnything(self):
		for i,foodType in enumerate(self.foodTypes):
			if self.eat(foodType):
				return;

	def eatAll(self):
		for i,foodType in enumerate(self.foodTypes):
			if self.eat(foodType):
				self.foodsEaten[i] = 1;
			else:
				self.foodsEaten[i] = 0;
		return self.foodsEaten;

	def eat(self,foodType):
		foodId = self.env[foodType].returnFoodIDAt(self.y, self.x)
		if foodId >= 0:
			foodItem = self.env[foodType].returnFood(foodId)
			if not foodItem.held:
				self.eatFood(foodItem,foodType)
				return True;
		return False;

	def eatFood(self,foodItem,foodType):
		foodItem.eat();
		if foodItem.size == 0:
			self.env[foodType].removeFood(foodItem.id);
			print "Food removed from environment"

	def printEnergy(self):
		print "Energy: "+str(self.energy);


	def senseEnvironment(self, foodType):
		inputValues = self.env[foodType].getScentsCEWNS(self.y,self.x)
		maxVal = max(inputValues)
		maxIndeces = [i for i, mymax in enumerate(inputValues) if mymax == maxVal]
		if maxIndeces:
			maxIndex = choice(maxIndeces)
		if maxIndex == 0:
			state = 'center'
		if maxIndex == 1:
			state = 'east'
		if maxIndex == 2:
			state = 'west'
		if maxIndex == 3:
			state = 'north'
		if maxIndex == 4:
			state = 'south'

		return state


	def isOnFood(self,foodType):
		id = self.env[foodType].returnFoodIDAt(self.y,self.x)
		if id != -1:
			return True
		return False

	def followGradient(self,stateMachine,toEat,toFollow):
		if stateMachine == 'notholding':
			self.dropAnything();
			self.performQLearnAction(self.senseEnvironment(toEat));
			if self.isOnFood(toEat):
				if self.pickup(toEat):
					return 'holding'
				else:
					return 'fail';
			return 'notholding'
		elif stateMachine == 'holding':
			self.performQLearnAction(self.senseEnvironment(toFollow));
			if self.isOnFood(toFollow):
				if self.drop(toEat):
					return 'eat'
			return 'holding'
		elif stateMachine == 'eat':
			#if self.isOnFood(toEat):
			#	self.eat(toEat);
			#	return 'eat';
			#elif self.isOnFood(toFollow):
			#	self.eat(toFollow);
			#	return 'eat';
			if self.isOnFood(toEat) or self.isOnFood(toFollow):
				if not max(self.eatAll()) == 0:
					return 'eat';
				else:
					return 'fail'
			else:
				return 'notholding';

	def replenishEnergy(self,energy=500.0):
		self.energy = [energy] * len(self.foodTypes)
		self.alive = True

	#reset flags for next iteration
	def resetFlags(self):
		self.moved = False
		self.followedGradient = False
		self.foodsEaten = [0] * len(self.foodTypes)

	def getTargetFoodSource(self):
		energyTilMax  = [y - x for x,y in zip(self.energy, self.maxEnergy)] # maxEnergy - currEnergy for each food source
		satiation     = [y * x for x,y in zip(self.energyUsageRate, energyTilMax)]
		satiation     = [y if x < 0 else -1 for x,y in zip(self.holding,satiation)]
		maxFollowValue = max(satiation)
		targetFoodSources = [i for i, mymax in enumerate(satiation) if mymax ==  maxFollowValue]
		if targetFoodSources:
			targetFood = choice(targetFoodSources)

		return targetFood

	def getReward(self, targetDirection, action):

		#Animat Parameter Constants
		LIVING_COST     = 1.0
		MOVEMENT_COST	= 0.01	 # Cost to move one unit
		EATING_REWARD   = 10.0   # Reward for eating one food source
		EATING_MULT_ENERGY = 50.0
		EATING_MULT_REWARD = 100.0
		GRADIENT_FOLLOW_REWARD = 10.0

		previousEnergy = copy.copy(self.energy)

		#print "Foods eaten: ", self.foodsEaten

		#Reward Gradient
		gradientReward = 0

		if targetDirection == action:
			LIVING_COST = 0
			MOVEMENT_COST = 0
			#gradientReward = LIVING_COST + MOVEMENT_COST #offset cost if following the target gradient
			#print targetDirection, action


		#Subtract living cost and movement cost for each energy rate
		self.energy = [ currEnergy + EATING_REWARD * foodEaten - rate * (LIVING_COST  + MOVEMENT_COST * self.moved) for currEnergy, rate, foodEaten in zip(self.energy, self.energyUsageRate, self.foodsEaten)]
		self.energy = [ min(currEnergy, maxEnergy) for currEnergy, maxEnergy in zip(self.energy,self.maxEnergy)] #Limit energy to max energy

		numFoodEaten = self.foodsEaten.count(1)

		if numFoodEaten > 1:
			for i, v in enumerate(self.foodsEaten):
				if v > 0:
					self.energy[i] += EATING_MULT_ENERGY #Add extra energy to food buckets when they eat multiple foods

		#Compute delta energy for each energy bucket
		deltaEnergy = [ currEnergy - prevEnergy for currEnergy, prevEnergy in zip(self.energy, previousEnergy)]
		netDeltaEnergy = sum(deltaEnergy) #sum up all of the delta energies

		#Determine a reward multiplier if eating multiple foods when hungry
		rewardsMultiplier = 1
		if numFoodEaten > 1 and netDeltaEnergy > 0:
			rewardsMultiplier += pow(EATING_MULT_REWARD, numFoodEaten - 1)
			print "Ate ",numFoodEaten," food sources!"
			self.multipleFoodEaten += 1;
			Animat.multipleFoodEaten[Animat.multipleFoodEatenIndex] += 1

		if numFoodEaten > 0:
			Animat.singleFoodEaten[Animat.singleFoodEatenIndex] += 1

		reward = netDeltaEnergy * rewardsMultiplier + gradientReward

		Animat.energyPerTick[Animat.energyPerTickIndex] += netDeltaEnergy

		#print action, targetDirection, self.foodsEaten, previousEnergy, self.energy, deltaEnergy, netDeltaEnergy
		#print reward

		return reward


		
Ejemplo n.º 17
0
class LearningAgent(Agent):
    """An agent that learns to drive in the smartcab world."""

    def __init__(self, env):
        super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        # TODO: Initialize any additional variables here
        self.learning_rate = 0.5
        self.discounting_factor = 0.3
        self.default_val = 2
        self.max_trials = 100
        self.l_initial_state = None
        self.l_initial_action = None
        self.l_initial_reward = None
        self.x_hit = range(0,self.max_trials)
        self.y_hit = range(0,self.max_trials)
        self.y_steps = range(0,self.max_trials)
        self.counter = -1
        self.steps_counter = -1
        self.enforce_deadline = True
        self.update_delay=0
        self.display = False
        self.QLearner = QLearn(l_actions=Environment.valid_actions,l_learning_rate =self.learning_rate,l_discounting_factor =self.discounting_factor,l_default_val = self.default_val)

    def reset(self, destination=None):
        self.planner.route_to(destination)
        # TODO: Prepare for a new trip; reset any variables here, if required
        print destination
        self.l_initial_state = None
        self.l_initial_action = None
        self.l_initial_reward = None		
        self.counter = self.counter + 1
        self.steps_counter = 0
        #print self.QLearner.states
		
    def update(self, t):
        # Gather inputs
        self.next_waypoint = self.planner.next_waypoint()  # from route planner, also displayed by simulator
        inputs = self.env.sense(self)
        deadline = self.env.get_deadline(self)
        self.steps_counter = self.steps_counter + 1
		
        print (inputs['light'],int(inputs['oncoming'] == 'right'),int(inputs['oncoming'] == 'left'))
        # TODO: Update state
        self.state=	 (inputs['light'],self.next_waypoint )
        # TODO: Select action according to your policy
        print 'self.state'
        print self.state
		
        action =self.QLearner.Get_action(self.state)
       
        # Execute action and get reward
        reward = self.env.act(self, action)

        # TODO: Learn policy based on state, action, reward
        self.QLearner.update_Q(self.l_initial_state,self.l_initial_action,self.l_initial_reward,self.state)
        self.l_initial_state = self.state
        self.l_initial_action = action
        self.l_initial_reward = reward
        self.y_steps[self.counter] = self.steps_counter
        if (deadline == 0) & (reward < 10):
            self.y_hit[self.counter] = 0
        else:
            self.y_hit[self.counter] = 1		
        print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}".format(deadline, inputs, action, reward)  # [debug]
	
    def plot_trials(self,x,y):
        matplotlib.pyplot.scatter(x,y)

        matplotlib.pyplot.show()

    def dynamic_gamma(self,gamma,max_value):
        return 	gamma/float(max_value)
Ejemplo n.º 18
0
import numpy as np
import os.path

import torch
from gridnet import GridNet
from DQN import DQN
from QLearn import QLearn

#------------------main----------------------------------------
PATH = './nn.pth'
QPATH = './q.npy'

gnet = GridNet()
Q_solv = gnet.PopulateQ(gamma=0.9)

qlearn = QLearn(gnet.n_size, gnet.n_BS, gnet.n_actions, gamma=0.9)
#load training Q_matrix
if os.path.isfile(QPATH):
    qlearn.Q_matrix = np.load(QPATH)
else:
    print("Error: no trained data available", QPATH)
    exit()

dqn = DQN(gnet.n_features, gnet.n_actions, gamma=0.9)
#Load trained parameters if exists.
if os.path.isfile(PATH):
    dqn.policy_net.load_state_dict(torch.load(PATH))
else:
    print("Error: no trained data available", PATH)
    exit()
Ejemplo n.º 19
0
class Prey(Object):
	
	senserange = 100
	distinct = 10

	#actions = ['up', 'down', 'left','right', 'stroll', 'eat']
	actions = ['up', 'down', 'left','right', 'stay', 'eat']
	def __init__(self, x=0, y=0, file='qlearn.txt'):
		Object.__init__(self, 'prey', x, y)
		self.qlearn = QLearn(Prey.actions)
		#self.origin = (self.x, self.y)
		self.dangerous = 0
		self.step = 4
		self.dumbed = 0
		self.lastact = None
		self.foodeaten = 0

		fin = open(file, 'r')
		lines = fin.readlines()
		for line in lines:
			content = line.split()
			state = int(content[0])
			action = content[1]
			value = float(content[2])
			self.qlearn.setQ(state, action, value)
			#print content
		# self.qlearn.setQ(1, 'up', 10)
		# self.qlearn.setQ(2, 'down', 10)
		# self.qlearn.setQ(3, 'left', 10)
		# self.qlearn.setQ(4, 'right', 10)
		# self.qlearn.setQ(5, 'eat', 10)
		# self.qlearn.setQ(6, 'stroll', 1000)
		self.food = None
		self.fd = diag
		self.hawk = None
		self.hd = diag
		self.bush = None
		self.bd = diag
		self.hunger = 0

	def tick(self, env):
		#qLearn.
		# initial reward for each step
		currentState  = self.getState(env)
		action = self.qlearn.chooseAction(currentState)
		self.act(currentState, action, env)
		#print dis
		reward = self.getReward(currentState, action) #update energies and get the reward after performing action
		
		nextState = self.getState(env) #get the new state after performing actions
		print currentState, action, reward #self.hunger, (self.food.x, self.food.y)
		#if currentState>=7:
		#	print currentState, action, reward
		
		#print "Reward is:", reward
		self.qlearn.learn(currentState, action, reward, nextState) #update the Q Table

	def act(self, state, action, env):
		self.hunger += 1

		step = self.step
		#if state == 6:
		#	step = Prey.foodrange
		if action == 'up':
			self.y = self.y-step
		if action == 'down':
			self.y = self.y+step
		if action == 'left':
			self.x = self.x-step
		if action == 'right':
			self.x = self.x+step
		if action == 'stroll':		
			if self.food not in env.food:
				self.food = random.choice(env.food)
			food = self.food
			print food.x, food.y

			self.x += 2 * (step * (random.random()-0.5))
			self.y += 2 * (step * (random.random()-0.5))
			
			x = abs(self.x - food.x)
			y = abs(self.y - food.y)
			t = max(x,y)
			self.x = int((t*self.x + 2*food.x) / (t+2.0))
			self.y = int((t*self.y + 2*food.y) / (t+2.0))
			
			# print 'stroll', food.x, food.y
		self.adjustxy()
		if action == 'eat':
			self.eat(state, env)
		# if state <= 6:
		# 	return self.fd - (abs(self.x-self.food.x) + abs(self.y-self.food.y))
		# elif state <= 11:
		# 	hd = 0
		# 	for hawkxy in self.hawkxy:
		# 		hd += (abs(self.x-hawkxy[0]) + abs(self.y-hawkxy[1]))
		# 	return (self.hd - hd)
		#print self.x, self.y
		#print action

	def adjustxy(self):
		self.dumbed = 1
		if self.x < 0:
			self.x = 0
		elif self.x > width:
			self.x = width
		elif self.y < 0:
			self.y = 0
		elif self.y > height:
			self.y = height
		else:
			self.dumbed = 0

	def getState(self, env):
		self.bush = env.find('bush', (self.x, self.y), Prey.senserange)
		self.origin = (self.x, self.y)

		err = 0
		state = []

		hawk = env.find('hawk', (self.x, self.y), Prey.senserange)
		if hawk != None:
			xdiff = hawk.x - self.x
			ydiff = hawk.y - self.y

			if abs(xdiff)>=abs(ydiff):
				if xdiff > 0:
					return 10 # hawk on the right
				elif xdiff < 0:
					return 9 # on the left
				else:
					return random.choice([7,8,9,10])
			else:
				if ydiff > 0:
					return 8 # down
				else:
					return 7 # up

			# if hawk.y + err < self.y:
			# 	state += [7] * (100/(abs(hawk.y - self.y))) # up
			# if hawk.y - err > self.y:
			# 	state += [8] * (100/(abs(hawk.y - self.y))) # down
			# if hawk.x + err < self.x:
			# 	state += [9] * (100/(abs(hawk.x - self.x)))# left
			# if hawk.x - err > self.x:
			# 	state += [10] * (100/(abs(hawk.x - self.x)))# right
			# self.hawk = hawk
			# if len(state)==0:
			# 	state = [7,8,9,10]
			# return random.choice(state)

		err = 10
		self.dangerous = 0
		food = env.find('food', (self.x, self.y), diag)
		if food != None:
			if food.y + err < self.y:
				state += [1] * (abs(food.y - self.y) * int(10*random.random()+1)) # up
			if food.y - err > self.y:
				state += [2] * (abs(food.y - self.y) * int(10*random.random()+1)) # down
			if food.x + err < self.x:
				state += [3] * (abs(food.x - self.x) * int(10*random.random()+1))# left
			if food.x - err > self.x:
				state += [4] * (abs(food.x - self.x) * int(10*random.random()+1))# right
			if abs(food.y-self.y) <= err and abs(food.x-self.x) <= err:
				state.append(5) # on
			self.food = food
		else:
			state.append(6) # nothing
			if self.food not in env.food:
				self.food = random.choice(env.food)
			
		# 	if abs(food.x-self.x)<=err and abs(food.y-self.y)<=err:
		# 		state = 5
		# 	else:
		# 		if food.y <= self.y and food.x >= self.x:
		# 			state = 1 # quadrant 1
		# 		elif food.y <= self.y and food.x <= self.x:
		# 			state = 2 # quadrant 2
		# 		elif food.x < self.x and food.y > self.y:
		# 			state = 3 # quadrant 3
		# 		#elif food.x > self.x and food.y > self.y:
		# 		else:
		# 			state = 4 # quadrant 4
		# 			print state
		# else:
		# 	state = 6 # nothing
			#print state
		#print state
		#print state
		#food = env.find('food', (self.x, self.y), diag)
		return random.choice(state)

	def getReward(self, state, action):
		#if state==6:
		reward = 0
		# energy consumption
		if action == 'eat':
			reward -= 10
		#elif action != 'stay':
		#	reward -= 1

		# getting hungry
		reward -= 5

		# food eaten
		if self.foodeaten == 1:
			reward += 110

		# food dis	
		if self.food != None:
			dis = abs(self.x-self.food.x)+abs(self.y-self.food.y)
			dis -= (abs(self.origin[0]-self.food.x)+abs(self.origin[1]-self.food.y))
			if state <= 6:
				if dis < 0:
					reward += 10

		if self.hawk == None and action == 'stay':
			reward -= 50
			
		# hawk dis
		if self.hawk != None:
			dis = max(abs(self.x-self.hawk.x), abs(self.y-self.hawk.y))
			dis -= max(abs(self.origin[0]-self.hawk.x), abs(self.origin[1]-self.hawk.y))
			if dis > 0:
				reward += 20
			else:
				dis = min(abs(self.x-self.hawk.x), abs(self.y-self.hawk.y))
				dis -= min(abs(self.origin[0]-self.hawk.x), abs(self.origin[1]-self.hawk.y))
				if dis > 0:
					reward += 10
				else:
					reward -= 50

		if self.dumbed:
			reward -= 200

		#if state<=6 and nstate>6:
		#	reward -= 1000

		# init for next tick
		self.foodeaten = 0
		return reward
		# if action == 'eat':
		# 	if state == 5:
		# 		return 100
		# 	else:
		# 		return -100
		# else:
		# 	return 0
	#	if state==6 and nextstate != 6:
	#		self.reward += 0
		#self.reward -= 1;
		# else:
		# 	return -10

	def eat(self, state, env):
		if state==5:
			self.foodeaten = 1
			# print 'eating'
			food = env.find('food', (self.x, self.y), Prey.senserange)
			env.remove(food)
			self.hunger -= 50
			self.hunger = max(0, self.hunger)

#if __name__ == "__main__":
#	p = Prey()

#	print p.x