class Agent(pygame.sprite.Sprite): def __init__(self, initBrain, initEmpty, screen_width, screen_height, name='agent'): # Call the parent's constructor super().__init__() self.gravity = 0.6 self.drag = 0.6 self.lift = -15 self.push = 15 self.maxLim = 6 self.minLim = -15 self.velocity_y = 0 self.velocity_x = 0 self.radius = 25 self.color = (0, 0, 0, 50) self.name = name self.image = pygame.Surface([self.radius, self.radius], pygame.SRCALPHA) self.image.fill(self.color) self.rect = self.image.get_rect() self.rect.x = 20 self.rect.y = screen_height / 2 self.timeSamplesExperianced = 0 self.totalDistanceFromGapOverTime = 0 self.fitness = 0 self.avgDistFromGap = 0 msLayeruUnits = [10, 7, 4] msActFunctions = ["relu", "softmax"] self.functional_system = NeuralNetwork( layer_units=msLayeruUnits, activation_func_list=msActFunctions) if initEmpty == False: self.functional_system.init_layers(init_type="he_normal") else: self.functional_system.init_layers(init_type="zeros") def show(self): pass def think(self, closestBlock, screen_width, screen_height): inputs = [] # input about on coming block object inputs.append(closestBlock.xPos / screen_width) inputs.append(closestBlock.top_block.rect.bottom / screen_height) inputs.append(closestBlock.bottom_block.rect.top / screen_height) inputs.append((closestBlock.xPos - self.rect.right) / screen_width) # input about agents current position inputs.append((screen_height - self.rect.bottom) / screen_height) # distance from bottom of screen inputs.append(self.rect.bottom / screen_height) inputs.append(self.rect.top / screen_height) inputs.append(self.rect.right / screen_height) inputs.append(self.rect.left / screen_height) inputs.append(self.minMaxNormalise(self.velocity_y)) inputs = np.array([inputs]) prediction = self.functional_system.feed_foward(inputs=inputs) if prediction[0] > prediction[1]: self.actionUp() if prediction[2] > prediction[3]: self.actionHorizontal() def actionUp(self): self.velocity_y += self.lift def actionHorizontal(self): self.velocity_x += self.push def reset(self, screen_height): self.gravity = 0.6 self.drag = 0.6 self.lift = -15 self.push = 5 self.maxLim = 6 self.minLim = -15 self.velocity_y = 0 self.velocity_x = 0 self.radius = 25 self.color = (0, 0, 0, 50) self.image = pygame.Surface([self.radius, self.radius], pygame.SRCALPHA) self.image.fill(self.color) self.rect = self.image.get_rect() self.rect.x = 10 self.rect.y = screen_height / 2 self.timeSamplesExperianced = 0 self.totalDistanceFromGapOverTime = 0 self.fitness = 0 self.avgDistFromGap = 0 def update(self, closestBlock, screen_height): self.velocity_y += self.gravity self.velocity_y *= 0.9 self.rect.y += self.velocity_y self.velocity_x += self.drag self.velocity_x *= 0.1 self.rect.x += self.velocity_x if self.velocity_y > self.maxLim: self.velocity_y = self.maxLim if self.velocity_y < self.minLim: self.velocity_y = self.minLim # if self.rect.bottom > screen_height: # self.rect.bottom = screen_height # self.velocity = 0 # # elif self.rect.top < 5: # self.rect.top = 0 # self.velocity = 0 # penalise agents for their distance on the y from the center of the gap of the blocks gap = closestBlock.bottom_block.rect.top - closestBlock.top_block.rect.bottom gapMid = closestBlock.top_block.rect.bottom + np.round((gap / 2)) agentDistanceFromGap = np.floor(np.abs(self.rect.midright[1] - gapMid)) self.totalDistanceFromGapOverTime = self.totalDistanceFromGapOverTime + agentDistanceFromGap self.timeSamplesExperianced = self.timeSamplesExperianced + 1 self.fitness = self.fitness + 1 def off_screen(self, screen_height, screen_width): if self.rect.top < 5: return True elif self.rect.bottom > screen_height: return True elif self.rect.left < 0: return True elif self.rect.right > screen_width: return True else: return False def minMaxNormalise(self, x): return (x - self.minLim) / (self.maxLim - self.minLim) def computeFitness(self): # penalise agent based on average distance from gap impactFactor = 0.9 # scales the percentage of penalisation applied self.avgDistFromGap = np.floor(self.totalDistanceFromGapOverTime / self.timeSamplesExperianced) self.fitness = self.fitness - np.floor( impactFactor * self.avgDistFromGap) if self.fitness < 0: self.fitness = 0
class Agent(pygame.sprite.Sprite): def __init__(self, xPos, yPos,xPos_range, yPos_range, initEmpty, vertical_fuel_depletion_rate=0.05, horizontal_fuel_depletion_rate=0.05, name='agent', color=(0, 0, 0, 50)): # Call the parent's constructor super().__init__() self.gravity = 0.0 self.drag = 0.0 self.lift = -10 self.push = 2 self.maxLim_y_velocity = 20 self.minLim_y_velocity = -20 self.maxLim_x_velocity = 4 self.minLim_x_velocity = -4 self.velocity_y = 0 self.velocity_x = 0 self.radius = 20 self.color = color self.current_closest_block = None self.fuel = 1.0 self.failure_meter = 0.0 self.vertical_fuel_depletion_rate = vertical_fuel_depletion_rate self.horizontal_fuel_depletion_rate = horizontal_fuel_depletion_rate if xPos_range is not None: xPos = np.random.randint(xPos_range[0], xPos_range[1]) if yPos_range is not None: yPos = np.random.randint(yPos_range[0], yPos_range[1]) self.name = name self.image = pygame.Surface([self.radius, self.radius], pygame.SRCALPHA) self.image.fill(self.color) self.rect = self.image.get_rect() self.rect.x = xPos self.rect.y = yPos self.previous_xPos = self.rect.right self.starting_xPos = xPos self.starting_yPos = yPos self.timeSamplesExperianced = 1 self.totalDistanceFromGapOverTime = 0 self.fitness = 0 self.avgDistFromGap = 0 msLayeruUnits = [12, 7, 2] msActFunctions = ["relu", "tanh"] self.functional_system = NeuralNetwork(layer_units=msLayeruUnits, activation_func_list=msActFunctions) if initEmpty == False: self.functional_system.init_layers(init_type="he_normal") else: self.functional_system.init_layers(init_type="zeros") def reset(self): self.velocity_y = 0 self.current_closest_block = None self.fuel = 1.0 self.rect.x = self.starting_xPos self.rect.y = self.starting_yPos self.previous_xPos = self.rect.center[0] self.timeSamplesExperianced = 1 self.totalDistanceFromGapOverTime = 0 self.fitness = 0 self.avgDistFromGap = 0 def think(self, active_blocks, screen_width, screen_height): # find closest block for data input result = list(filter(lambda x: (x.top_block.rect.right > self.rect.right), active_blocks)) result.sort(key=lambda x:(x.top_block.rect.right)) if len(result) != 0: closest_block = result[0] inputs = [] #input about on coming block object inputs.append(closest_block.xPos / screen_width) inputs.append(closest_block.top_block.rect.bottom / screen_height) inputs.append(closest_block.bottom_block.rect.top / screen_height) inputs.append((closest_block.xPos - self.rect.right) / screen_width) # input about agents current position inputs.append((screen_height - self.rect.bottom)/screen_height) # distance from bottom of screen inputs.append(self.rect.bottom / screen_height) inputs.append(self.rect.top / screen_height) inputs.append(self.rect.right / screen_height) inputs.append(self.rect.left / screen_height) inputs.append(self.fuel) # inputs.append(self.vertical_fuel_depletion_rate) # inputs.append(self.horizontal_fuel_depletion_rate) inputs.append(self.minMaxNormalise(self.velocity_x, min=self.minLim_x_velocity, max=self.maxLim_x_velocity)) inputs.append(self.minMaxNormalise(self.velocity_y, min=self.minLim_y_velocity, max=self.maxLim_y_velocity)) inputs = np.array([inputs]) prediction = self.functional_system.feed_foward(inputs=inputs) self.actionVertical(input=prediction[0], mode='joystick_control') self.actionHorizontal(input=prediction[1], mode='joystick_control') # if prediction[0] > prediction[1]: # self.actionVertical() # if prediction[2] > prediction[3]: # self.actionHorizontal() self.current_closest_block = closest_block def actionVertical(self, input=None, mode='discrete_control'): if mode == 'discrete_control': self.velocity_y += self.lift elif mode == 'joystick_control': self.velocity_y += self.lift * input else: raise ValueError('ERROR: Invalid action control entry') self.fuel -= self.vertical_fuel_depletion_rate self.color_gauge(self.vertical_fuel_depletion_rate) def actionHorizontal(self, input=None, mode='discrete_control'): if mode == 'discrete_control': self.velocity_x += self.push elif mode == 'joystick_control': self.velocity_x += self.push * input else: raise ValueError('ERROR: Invalid action control entry') self.fuel -= self.horizontal_fuel_depletion_rate self.color_gauge(self.horizontal_fuel_depletion_rate) def color_gauge(self, deduction): self.failure_meter += deduction if self.failure_meter > 1: self.failure_meter = 0 c = list(self.color) c[0] = self.failure_meter*255 self.color = tuple(c) self.image.fill(self.color) def update(self, screen_height): self.velocity_y += self.gravity self.velocity_y *= 0.9 self.rect.y += self.velocity_y self.velocity_x += self.drag # self.velocity_x *= 0.9 self.rect.x += self.velocity_x if self.velocity_y > self.maxLim_y_velocity: self.velocity_y = self.maxLim_y_velocity if self.velocity_y < self.minLim_y_velocity: self.velocity_y = self.minLim_y_velocity if self.velocity_x > self.maxLim_x_velocity: self.velocity_x = self.maxLim_x_velocity if self.velocity_x < self.minLim_x_velocity: self.velocity_x = self.minLim_x_velocity # penalise agents for their distance on the y from the center of the gap of the blocks gap = self.current_closest_block.bottom_block.rect.top - self.current_closest_block.top_block.rect.bottom gapMid = self.current_closest_block.top_block.rect.bottom + np.round((gap / 2)) agentDistanceFromGap = np.floor(np.abs(self.rect.midright[1] - gapMid)) self.totalDistanceFromGapOverTime += agentDistanceFromGap self.timeSamplesExperianced += 1 if self.rect.right > self.previous_xPos: # fitness only increses if the agent is moving to the right towards the goal self.fitness += 1 self.previous_xPos = self.rect.right def fuel_depleted(self): if self.fuel < 0: return True return False def off_screen(self, screen_height, screen_width): if self.rect.top < 5: return True elif self.rect.bottom > screen_height: return True elif self.rect.left < 0: return True elif self.rect.right > screen_width: return True else: return False def minMaxNormalise(self, x, min, max): return (x - min) / (max - min) def computeFitness(self): self.fitness = math.pow(self.fitness, 4) impactFactor = 0.5 # scales the percentage of penalisation applied self.avgDistFromGap = np.floor(self.totalDistanceFromGapOverTime / self.timeSamplesExperianced) fitness_penalty =np.floor(impactFactor * self.avgDistFromGap) self.fitness -= fitness_penalty if self.fitness < 0: self.fitness = 0