def measure_efficiency(evolution, n_preys, n_tests, confusion): total_preys, total_visible_preys, total_successes, attempts = list(), list( ), list(), list() if confusion: for t in range(n_tests): grid = Grid(512, 512) predator = evolution.predator_pool[0] Predator(grid, brain=predator.brain, mask=predator.mask, confusion=True) prey = evolution.prey_pool[0] for _ in range(n_preys): Prey(grid, brain=prey.brain) print("\nRunning test", t + 1, "out of", n_tests) visible_preys, successes = list(), list() for _ in range(2000): for animal in grid.inhabitants: result = animal.look_around() if result: visible_preys.append(result[0]) successes.append(result[1]) for animal in grid.inhabitants: animal.pick_move() #print(grid.n_preys(),'preys left') #print('Average number of visible preys in attack:',np.mean(visible_preys)) #print('Proportion of successes:',len(np.where(successes)[0])/len(successes)) total_preys.append(grid.n_preys()) total_visible_preys.append(np.mean(visible_preys)) total_successes.append( len(np.where(successes)[0]) / len(successes)) attempts.append(len(successes)) print('\nAverage number of preys left:', np.mean(total_preys)) print('\nAverage number of visible preys in attacks:', np.mean(total_visible_preys)) print('\nProportion of successes:', np.mean(total_successes)) print('\nAverage number of attacks:', np.mean(attempts)) else: preys_left = list() for t in range(n_tests): grid = Grid(512, 512) predator = evolution.predator_pool[0] Predator(grid, brain=predator.brain, mask=predator.mask, confusion=False) prey = evolution.prey_pool[0] for _ in range(n_preys): Prey(grid, brain=prey.brain) print("\nRunning test", t + 1, "out of", n_tests) for _ in range(2000): for animal in grid.inhabitants: animal.look_around() for animal in grid.inhabitants: animal.pick_move() preys_left.append(grid.n_preys()) print('\nAverage number of preys left:', np.mean(preys_left))
def __init__(self, Environment, myLocation): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 # Initialize the predators in this team self.Predator = Predator(Environment, myLocation) self.actions = self.Predator.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon)
def test_one_on_one(predator, prey, return_dict, num_test, pr): grid = Grid(512, 512) # copy the predator test_predator = Predator(grid, brain=predator.brain, confusion=self.confusion, mask=self.mask) # copy and duplicate the prey (n_preys instances) swarm = [Prey(grid, brain=prey.brain) for _ in range(self.n_preys)] for i in range(self.n_iters): # next 4 lines to print the % of progress in the simulation #if i%(self.n_iters/10)==0: #if pr[int(i/(self.n_iters/10))]<self.card_prey_pool*self.card_predator_pool-10: # pr[int(i/(self.n_iters/10))]+=1 #else: # t = np.round((time.time()-start)/60,2) # t = str(t).split('.') # t = [t[0],str(int(int(t[1])*.6))] # print(int(i*100/self.n_iters),'%\t'+t[0]+'m'+t[1]+'s') # pr[int(i/(self.n_iters/10))] = -self.card_prey_pool*self.card_predator_pool self.next_timeStep(grid) return_dict[num_test] = { 'predator': test_predator.fitness, 'prey': swarm[0].fitness }
def generate_random_predators(self, n): return [ Predator(Grid(*self.grid_size), confusion=self.confusion, mask=self.mask, view_mode=self.view_mode) for _ in range(n) ]
def update_predator_pool(self, ranked_predators): kids, p1, p2 = self.make_up_next_predator_generation(ranked_predators) # keep first 2 predators for next generation to put them up against their children self.predator_pool = list() self.predator_pool.append( Predator(Grid(*self.grid_size), confusion=self.confusion, mask=self.mask, brain=p1)) self.predator_pool.append( Predator(Grid(*self.grid_size), confusion=self.confusion, mask=self.mask, brain=p2)) for kid in kids: self.predator_pool.append(kid)
def __init__(self, Environment, myLocation): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 # Initialize the predators in this team self.Predator = Predator( Environment, myLocation ) self.actions = self.Predator.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon)
def __init__(self, width=11, height=11, preyLocation=(5, 5), predatorLocation=(0, 0)): self.width = width self.height = height self.S, self.terminal_states = self.getStates() self.Prey = Prey(self, preyLocation) self.Predators = Predator(self, predatorLocation)
def __init__( self, width=11, height=11, preyLocation=(5,5), predatorLocation=(0,0), numberOfPredators=1 ): assert numberOfPredators > 0 assert numberOfPredators < 5 assert type(numberOfPredators) == int self.width = width self.height = height self.numberOfPredators = numberOfPredators self.Prey = Prey( self, preyLocation ) self.PredatorLocations = [(0,0), (10,0), (0,10), (10,10)] self.Predators = [Predator(self, self.PredatorLocations[i]) \ for i in range(self.numberOfPredators)] self.Agents = [self.Prey] + self.Predators
def make_up_next_predator_generation(self, ranked_predators): # select top 3 predators for reproduction p = [ranked_predators[i][0] for i in range(3)] # adding predator n°5 for diversity p += [ranked_predators[4][0]] kids = [] for i in range(self.card_predator_pool - 2): # initialize weight matrix for the kid's perceptron w = np.zeros(p[0].brain.coef_.shape) # pick the parents randomly p1, p2 = np.random.choice(p, 2, replace=False) # give the kid its parents' genes, +10% mutation for row in range(w.shape[0]): for col in range(w.shape[1]): r = np.random.random() if r < .45: w[row, col] = p1.brain.coef_[row, col] elif r < .9: w[row, col] = p2.brain.coef_[row, col] else: w[row, col] = np.random.random() * 2 - 1 kid_mask = self.mask.copy() if self.evolve_mask: r = np.random.random() if r < .45: kid_mask = p1.mask elif r < .9: kid_mask = p2.mask else: kid_mask = masks[np.random.choice(masks.shape[0])] kid = Predator(Grid(*self.grid_size), confusion=self.confusion, mask=kid_mask, view_mode=self.view_mode) kid.brain.coef_ = w kids.append(kid) return kids, p[0].brain, p[1].brain
class TeamPredator(): def __init__(self, Environment, myLocation): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 # Initialize the predators in this team self.Predator = Predator( Environment, myLocation ) self.actions = self.Predator.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon) def updateQ(self, s, a, o, s_prime, r): ''' Update this teams Q and V. ''' O = self.actions A = self.actions # Use linear programming to obtain optimal policy for this state try: # Create a new model m = grb.Model("MultiAgentMinimax") m.setParam("OutputFlag",0) # Create variables pi = dict() for a in A: pi[a] = m.addVar( 0.0, 1.0, vtype = grb.GRB.CONTINUOUS, name = str(a) ) # Integrate new variables m.update() # Set objective m.setObjective( grb.LinExpr( [ ( self.TeamQLearning.Q[s][(a,o)], pi[a] ) for o in O for a in A ] ), grb.GRB.MAXIMIZE) # Add constraint: Sum_a pi(a) = 1 expr = grb.quicksum( m.getVars() ) m.addConstr( expr == 1, "Total probability" ) # Add more constraints for o in O: expr = grb.LinExpr( [ (self.TeamQLearning.Q[s][(a,o)], pi[a]) for a in self.actions ] ) m.addConstr( expr >= 0 ) m.optimize() for a in A: self.TeamQLearning.policy[s][a] = pi[a].x except grb.GurobiError: print 'Error reported' # Update Q and V self.TeamQLearning.updateQ(s, a, o, s_prime, r) def getJointActionEpsilonGreedy(self, s): # Find the (joint) action that maximizes Q[(s, a)] prob_actions = dict() uniform_epsilon = self.TeamQLearning.epsilon / (len(self.actions)) for possible_a in self.actions: # Set probabilities of all actions uniformly prob_actions[possible_a] = uniform_epsilon best_a = argmax( self.TeamQLearning.policy[s] ) prob_actions[best_a] += 1 - self.TeamQLearning.epsilon # For every action, check if the cumulative probability exceeds a # random number. random_number = random.random() cumulative_prob = 0.0 for a in self.actions: cumulative_prob += prob_actions[a] if cumulative_prob >= random_number: return a def performAction(self, a): self.Predator.performAction(a) def performJointAction(self, a): for n in xrange(self.Environment.numberOfPredators): self.Predators[n].performAction(a) def permutations(self, iterable, r=None): ''' iterator <- permutations(iterable, r) Finds permutations of iterable of length r, with duplicate entries. ''' pool = tuple(iterable) n = len(pool) r = n if r is None else r for indices in product(range(n), repeat=r): if len(indices) == r: yield tuple(pool[i] for i in indices)
def LoadDataFile(self, fileName): tree = ElementTree.parse(open(fileName)) root = tree.getroot() # World Map Constants worldMap = root.find("./LAND_BOUNDS") self.WorldWidth = float(worldMap.find("WIDTH").text) self.WorldHeight = float(worldMap.find("HEIGHT").text) # Plants plants = root.find("./PLANTS") self.InitialPlantCount = int(plants.find("INITIAL_PLANT_COUNT").text) self.PlantGrowthRate = float(plants.find("GROWTH_RATE").text) self.MaxPlantSize = int(plants.find("MAX_SIZE").text) self.MaxSeedCastDistance = int( plants.find("MAX_SEED_CAST_DISTANCE").text) self.MaxSeedNumber = int(plants.find("MAX_SEED_NUMBER").text) self.SeedViabilityPercentage = float( plants.find("SEED_VIABILITY").text) self.InitialPlantList = [] for plant in plants.findall("PLANT"): self.InitialPlantList.append( Plant(int(plant.find("X_POS").text), int(plant.find("Y_POS").text), int(plant.find("P_DIAMETER").text))) # Grazers grazers = root.find("./GRAZERS") self.InitialGrazerCount = int( grazers.find("INITIAL_GRAZER_COUNT").text) self.GrazerEnergyInputRate = int(grazers.find("G_ENERGY_INPUT").text) self.GrazerEnergyOutputRate = int(grazers.find("G_ENERGY_OUTPUT").text) self.GrazerEnergyToReproduce = int( grazers.find("G_ENERGY_TO_REPRODUCE").text) self.GrazerMaxSpeedTime = float(grazers.find("G_MAINTAIN_SPEED").text) self.GrazerMaxSpeed = float(grazers.find("G_MAX_SPEED").text) self.InitialGrazerList = [] for grazer in grazers.findall("GRAZER"): self.InitialGrazerList.append( Grazer(int(grazer.find("X_POS").text), int(grazer.find("Y_POS").text), int(grazer.find("G_ENERGY_LEVEL").text))) # Predators predators = root.find("./PREDATORS") self.InitialPredatorCount = int( predators.find("INITIAL_PREDATOR_COUNT").text) self.PredatorMaxSpeedHOD = float(predators.find("MAX_SPEED_HOD").text) self.PredatorMaxSpeedHED = float(predators.find("MAX_SPEED_HED").text) self.PredatorMaxSpeedHOR = float(predators.find("MAX_SPEED_HOR").text) self.PredatorMaxSpeedTime = float( predators.find("P_MAINTAIN_SPEED").text) self.PredatorEnergyOutputRate = int( predators.find("P_ENERGY_OUTPUT").text) self.PredatorEnergyToReproduce = int( predators.find("P_ENERGY_TO_REPRODUCE").text) self.PredatorMaxOffspring = int(predators.find("P_MAX_OFFSPRING").text) self.PredatorGestationPeriodDays = float( predators.find("P_GESTATION").text) self.PredatorOffspringEnergyLevel = int( predators.find("P_OFFSPRING_ENERGY").text) self.InitialPredatorList = [] prID = 0 for predator in predators.findall("PREDATOR"): genotype = predator.find("GENOTYPE").text genotype = re.sub(r"[\n\t]*", "", genotype).split() self.InitialPredatorList.append( Predator(int(predator.find("X_POS").text), int(predator.find("Y_POS").text), int(predator.find("P_ENERGY_LEVEL").text), genotype, prID)) prID += 1 # Obstacles obstacles = root.find("./OBSTACLES") self.InitialObstacleCount = int( obstacles.find("INITIAL_OBSTACLE_COUNT").text) self.InitialObstacleList = [] for obstacle in obstacles.findall("OBSTACLE"): self.InitialObstacleList.append( Obstacle(int(obstacle.find("X_POS").text), int(obstacle.find("Y_POS").text), int(obstacle.find("O_DIAMETER").text), int(obstacle.find("O_HEIGHT").text)))
sizeCalculation = widthOfCell * numberOfCellsInColumnsOrRows + ( margin * (numberOfCellsInColumnsOrRows + 1)) size = (sizeCalculation, sizeCalculation) screen = pygame.display.set_mode(size) # Environment Agents/Objects grid = Grid(numberOfCellsInColumnsOrRows, numberOfCellsInColumnsOrRows, sizeOfCell, screen, margin, pygame) # Always init obstacles before everything else, because you don't want food being init'd on top of obstacle [ Obstacle(widthOfCell, heightOfCell, BLACK, grid) for i in range(0, numberOfObstacles) ] [ Predator(widthOfCell, heightOfCell, RED, grid) for i in range(0, numberOfPredators) ] [ PreyAdult(widthOfCell, heightOfCell, GREEN, grid) for i in range(0, numberOfPreyAdults) ] [ PreyOffspring(widthOfCell, heightOfCell, CYAN, grid) for i in range(0, numberOfPreyOffsprings) ] [ Food(widthOfCell, heightOfCell, ORANGE, grid) for i in range(0, numberOfFoodObjects) ]
class TeamPredator(): def __init__(self, Environment, myLocation): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 # Initialize the predators in this team self.Predator = Predator(Environment, myLocation) self.actions = self.Predator.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon) def updateQ(self, s, a, o, s_prime, r): ''' Update this teams Q and V. ''' O = self.actions A = self.actions # Use linear programming to obtain optimal policy for this state try: # Create a new model m = grb.Model("MultiAgentMinimax") m.setParam("OutputFlag", 0) # Create variables pi = dict() for a in A: pi[a] = m.addVar(0.0, 1.0, vtype=grb.GRB.CONTINUOUS, name=str(a)) # Integrate new variables m.update() # Set objective m.setObjective( grb.LinExpr([(self.TeamQLearning.Q[s][(a, o)], pi[a]) for o in O for a in A]), grb.GRB.MAXIMIZE) # Add constraint: Sum_a pi(a) = 1 expr = grb.quicksum(m.getVars()) m.addConstr(expr == 1, "Total probability") # Add more constraints for o in O: expr = grb.LinExpr([(self.TeamQLearning.Q[s][(a, o)], pi[a]) for a in self.actions]) m.addConstr(expr >= 0) m.optimize() for a in A: self.TeamQLearning.policy[s][a] = pi[a].x except grb.GurobiError: print 'Error reported' # Update Q and V self.TeamQLearning.updateQ(s, a, o, s_prime, r) def getJointActionEpsilonGreedy(self, s): # Find the (joint) action that maximizes Q[(s, a)] prob_actions = dict() uniform_epsilon = self.TeamQLearning.epsilon / (len(self.actions)) for possible_a in self.actions: # Set probabilities of all actions uniformly prob_actions[possible_a] = uniform_epsilon best_a = argmax(self.TeamQLearning.policy[s]) prob_actions[best_a] += 1 - self.TeamQLearning.epsilon # For every action, check if the cumulative probability exceeds a # random number. random_number = random.random() cumulative_prob = 0.0 for a in self.actions: cumulative_prob += prob_actions[a] if cumulative_prob >= random_number: return a def performAction(self, a): self.Predator.performAction(a) def performJointAction(self, a): for n in xrange(self.Environment.numberOfPredators): self.Predators[n].performAction(a) def permutations(self, iterable, r=None): ''' iterator <- permutations(iterable, r) Finds permutations of iterable of length r, with duplicate entries. ''' pool = tuple(iterable) n = len(pool) r = n if r is None else r for indices in product(range(n), repeat=r): if len(indices) == r: yield tuple(pool[i] for i in indices)
sizeOfGrid = 15 sizeOfScreen = numOfGridsInARow * sizeOfGrid surface = pygame.display.set_mode((numOfGridsInARow * sizeOfGrid,numOfGridsInARow * sizeOfGrid)) numOfPreys = 1 numOfPredators = 1 numOfObstacles = 1 numOfFood = 1 worldMap = Map(numOfGridsInARow,sizeOfGrid,WHITE,surface,pygame) # fixed item should be placed first, since our grid is implemented in a stack manner obstacles = [Obstacle(worldMap,random.randrange(1,4),random.randrange(1,4),BROWN) for i in range(numOfObstacles)] # obstacles = [Obstacle(worldMap,3,1,BROWN) for i in range(numOfObstacles)] foods = [Food(worldMap,1,1,GREEN) for i in range(numOfFood)] predators = [Predator(worldMap,3,1,1,RED) for i in range(numOfPredators)] preys = [Prey(worldMap,2,1,1,YELLOW) for i in range(numOfPreys)] age = 1 learningAges = 1 preysEaten = [] foodEaten = [] while True: event = pygame.event.poll() if event.type == pygame.QUIT: pygame.quit() break surface.fill(WHITE) #update for every animat in the world