class WaterWorld(base.PyGameWrapper): """ Based Karpthy's WaterWorld in `REINFORCEjs`_. .. _REINFORCEjs: https://github.com/karpathy/reinforcejs Parameters ---------- width : int Screen width. height : int Screen height, recommended to be same dimension as width. num_creeps : int (default: 3) The number of creeps on the screen at once. """ def __init__(self, width=48, height=48, num_creeps=3): actions = { "up": K_w, "left": K_a, "right": K_d, "down": K_s } base.PyGameWrapper.__init__(self, width, height, actions=actions) self.BG_COLOR = (255, 255, 255) self.N_CREEPS = num_creeps self.CREEP_TYPES = ["GOOD", "BAD"] self.CREEP_COLORS = [(40, 140, 40), (150, 95, 95)] radius = percent_round_int(width, 0.047) self.CREEP_RADII = [radius, radius] self.CREEP_REWARD = [ self.rewards["positive"], self.rewards["negative"]] self.CREEP_SPEED = 0.25 * width self.AGENT_COLOR = (60, 60, 140) self.AGENT_SPEED = 0.25 * width self.AGENT_RADIUS = radius self.AGENT_INIT_POS = (self.width / 2, self.height / 2) self.creep_counts = { "GOOD": 0, "BAD": 0 } self.dx = 0 self.dy = 0 self.player = None self.creeps = None def _handle_player_events(self): self.dx = 0 self.dy = 0 for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: key = event.key if key == self.actions["left"]: self.dx -= self.AGENT_SPEED if key == self.actions["right"]: self.dx += self.AGENT_SPEED if key == self.actions["up"]: self.dy -= self.AGENT_SPEED if key == self.actions["down"]: self.dy += self.AGENT_SPEED def _add_creep(self): creep_type = self.rng.choice([0, 1]) creep = None pos = (0, 0) dist = 0.0 while dist < 1.5: radius = self.CREEP_RADII[creep_type] * 1.5 pos = self.rng.uniform(radius, self.height - radius, size=2) dist = math.sqrt( (self.player.pos.x - pos[0])**2 + (self.player.pos.y - pos[1])**2) creep = Creep( self.CREEP_COLORS[creep_type], self.CREEP_RADII[creep_type], pos, self.rng.choice([-1, 1], 2), self.rng.rand() * self.CREEP_SPEED, self.CREEP_REWARD[creep_type], self.CREEP_TYPES[creep_type], self.width, self.height, self.rng.rand() ) self.creeps.add(creep) self.creep_counts[self.CREEP_TYPES[creep_type]] += 1 def getGameState(self): """ Returns ------- dict * player x position. * player y position. * player x velocity. * player y velocity. * player distance to each creep """ state = { "player_x": self.player.pos.x, "player_y": self.player.pos.y, "player_velocity_x": self.player.vel.x, "player_velocity_y": self.player.vel.y, "creep_dist": { "GOOD": [], "BAD": [] } } for c in self.creeps: dist = math.sqrt((self.player.pos.x - c.pos.x) ** 2 + (self.player.pos.y - c.pos.y)**2) state["creep_dist"][c.TYPE].append(dist) return state def getScore(self): return self.score def game_over(self): """ Return bool if the game has 'finished' """ return (self.creep_counts['GOOD'] == 0) def init(self): """ Starts/Resets the game to its inital state """ self.creep_counts = {"GOOD": 0, "BAD": 0} if self.player is None: self.player = Player( self.AGENT_RADIUS, self.AGENT_COLOR, self.AGENT_SPEED, self.AGENT_INIT_POS, self.width, self.height ) else: self.player.pos = vec2d(self.AGENT_INIT_POS) self.player.vel = vec2d((0.0, 0.0)) if self.creeps is None: self.creeps = pygame.sprite.Group() else: self.creeps.empty() for i in range(self.N_CREEPS): self._add_creep() self.score = 0 self.ticks = 0 self.lives = -1 def step(self, dt): """ Perform one step of game emulation. """ dt /= 1000.0 self.screen.fill(self.BG_COLOR) self.score += self.rewards["tick"] self._handle_player_events() self.player.update(self.dx, self.dy, dt) hits = pygame.sprite.spritecollide(self.player, self.creeps, True) for creep in hits: self.creep_counts[creep.TYPE] -= 1 self.score += creep.reward self._add_creep() if self.creep_counts["GOOD"] == 0: self.score += self.rewards["win"] self.creeps.update(dt) self.player.draw(self.screen) self.creeps.draw(self.screen)
class WaterWorld(PyGameWrapper): """ Based Karpthy's WaterWorld in `REINFORCEjs`_. .. _REINFORCEjs: https://github.com/karpathy/reinforcejs Parameters ---------- width : int Screen width. height : int Screen height, recommended to be same dimension as width. num_creeps : int (default: 3) The number of creeps on the screen at once. """ def __init__(self, width=48, height=48, num_creeps=3): actions = { "up": K_w, "left": K_a, "right": K_d, "down": K_s } PyGameWrapper.__init__(self, width, height, actions=actions) self.BG_COLOR = (255, 255, 255) self.N_CREEPS = num_creeps self.CREEP_TYPES = ["GOOD", "BAD"] self.CREEP_COLORS = [(40, 140, 40), (150, 95, 95)] radius = percent_round_int(width, 0.02) self.CREEP_RADII = [radius, radius] self.CREEP_REWARD = [ self.rewards["positive"], self.rewards["negative"]] self.CREEP_SPEED = 0.25 * width self.AGENT_COLOR = (60, 60, 140) self.AGENT_SPEED = 0.25 * width self.AGENT_RADIUS = radius self.AGENT_INIT_POS = (self.width / 2, self.height / 2) self.creep_counts = { "GOOD": 0, "BAD": 0 } self.dx = 0 self.dy = 0 self.player = None self.creeps = None def _handle_player_events(self): self.dx = 0 self.dy = 0 for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: key = event.key if key == 117: #u pressed print("u pressed") self.ple.display_screen = not self.ple.display_screen self.ple.force_fps = not self.ple.force_fps if key == self.actions["left"]: self.dx -= self.AGENT_SPEED if key == self.actions["right"]: self.dx += self.AGENT_SPEED if key == self.actions["up"]: self.dy -= self.AGENT_SPEED if key == self.actions["down"]: self.dy += self.AGENT_SPEED def _add_creep(self): creep_type = self.rng.choice([0, 1]) creep = None pos = (0, 0) dist = 0.0 while dist < 1.5: radius = self.CREEP_RADII[creep_type] * 1.5 pos = self.rng.uniform(radius, self.height - radius, size=2) dist = math.sqrt( (self.player.pos.x - pos[0])**2 + (self.player.pos.y - pos[1])**2) creep = Creep( self.CREEP_COLORS[creep_type], self.CREEP_RADII[creep_type], pos, self.rng.choice([-1, 1], 2), self.rng.rand() * self.CREEP_SPEED, self.CREEP_REWARD[creep_type], self.CREEP_TYPES[creep_type], self.width, self.height, self.rng.rand() ) self.creeps.add(creep) self.creep_counts[self.CREEP_TYPES[creep_type]] += 1 def getGameState(self): """ Returns ------- dict * player x position. * player y position. * player x velocity. * player y velocity. * player distance to each creep """ state = { "player_x": self.player.pos.x, "player_y": self.player.pos.y, "player_velocity_x": self.player.vel.x, "player_velocity_y": self.player.vel.y, "creep_dist": { "GOOD": [], "BAD": [] }, "creep_pos": { "GOOD": [], "BAD": [] }, "creep_vel": { "GOOD": [], "BAD": [] } } for c in self.creeps: dist = math.sqrt((self.player.pos.x - c.pos.x) ** 2 + (self.player.pos.y - c.pos.y)**2) state["creep_dist"][c.TYPE].append(dist) state["creep_pos"][c.TYPE].append([c.pos.x, c.pos.y]) state["creep_vel"][c.TYPE].append([c.direction.x * c.speed, c.direction.y * c.speed]) return state def getScore(self): return self.score def game_over(self): """ Return bool if the game has 'finished' """ return (self.creep_counts['GOOD'] == 0) def init(self): """ Starts/Resets the game to its inital state """ self.creep_counts = {"GOOD": 0, "BAD": 0} if self.player is None: self.player = Player( self.AGENT_RADIUS, self.AGENT_COLOR, self.AGENT_SPEED, self.AGENT_INIT_POS, self.width, self.height ) else: self.player.pos = vec2d(self.AGENT_INIT_POS) self.player.vel = vec2d((0.0, 0.0)) if self.creeps is None: self.creeps = pygame.sprite.Group() else: self.creeps.empty() for i in range(self.N_CREEPS): self._add_creep() self.score = 0 self.ticks = 0 self.lives = -1 def step(self, dt): """ Perform one step of game emulation. """ dt /= 1000.0 self.screen.fill(self.BG_COLOR) self.score += self.rewards["tick"] self._handle_player_events() self.player.update(self.dx, self.dy, dt) hits = pygame.sprite.spritecollide(self.player, self.creeps, True) for creep in hits: self.creep_counts[creep.TYPE] -= 1 self.score += creep.reward self._add_creep() if self.creep_counts["GOOD"] == 0: self.score += self.rewards["win"] self.creeps.update(dt) self.player.draw(self.screen) self.creeps.draw(self.screen)
class PuckWorld(base.Game): """ Based Karpthy's PuckWorld in `REINFORCEjs`_. .. _REINFORCEjs: https://github.com/karpathy/reinforcejs Parameters ---------- width : int Screen width. height : int Screen height, recommended to be same dimension as width. """ def __init__(self, width=64, height=64): actions = { "up": K_w, "left": K_a, "right": K_d, "down": K_s } base.Game.__init__(self, width, height, actions=actions) self.CREEP_BAD = { "radius_center": percent_round_int(width, 0.047), "radius_outer": percent_round_int(width, 0.265), "color_center": (110, 45, 45), "color_outer": (150, 95, 95), "speed": 0.05*width } self.CREEP_GOOD = { "radius": percent_round_int(width, 0.047), "color": (40, 140, 40) } self.AGENT_COLOR = (60, 60, 140) self.AGENT_SPEED = 0.2*width self.AGENT_RADIUS = percent_round_int(width, 0.047) self.AGENT_INIT_POS = (self.AGENT_RADIUS*1.5, self.AGENT_RADIUS*1.5) self.BG_COLOR = (255, 255, 255) self.dx = 0 self.dy = 0 self.ticks = 0 def _handle_player_events(self): self.dx = 0.0 self.dy = 0.0 for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: key = event.key if key == self.actions["left"]: self.dx -= self.AGENT_SPEED if key == self.actions["right"]: self.dx += self.AGENT_SPEED if key == self.actions["up"]: self.dy -= self.AGENT_SPEED if key == self.actions["down"]: self.dy += self.AGENT_SPEED def getGameState(self): """ Gets a non-visual state representation of the game. Returns ------- dict * player x position. * player y position. * players x velocity. * players y velocity. * good creep x position. * good creep y position. * bad creep x position. * bad creep y position. See code for structure. """ state = { "player_x": self.player.pos.x, "player_y": self.player.pos.y, "player_velocity_x": self.player.vel.x, "player_velocity_y": self.player.vel.y, "good_creep_x": self.good_creep.pos.x, "good_creep_y": self.good_creep.pos.y, "bad_creep_x": self.bad_creep.pos.x, "bad_creep_y": self.bad_creep.pos.y } return state def getScore(self): return self.score def game_over(self): """ Return bool if the game has 'finished' """ return False def _rngCreepPos(self): r = self.CREEP_GOOD['radius'] x = self.rng.uniform(r*3, self.width-r*2.5) y = self.rng.uniform(r*3, self.height-r*2.5) return ( x, y ) def init(self): """ Starts/Resets the game to its inital state """ self.player = Player(self.AGENT_RADIUS, self.AGENT_COLOR, self.AGENT_SPEED, self.AGENT_INIT_POS, self.width, self.height) self.good_creep = Creep( self.CREEP_GOOD['color'], self.CREEP_GOOD['radius'], self._rngCreepPos(), (1,1), 0.0, 1.0, "GOOD", self.width, self.height, 0.0 #jitter ) self.bad_creep = PuckCreep((self.width, self.height), self.CREEP_BAD, self.screen_dim[0]*0.75, self.screen_dim[1]*0.75) self.creeps = pygame.sprite.Group() self.creeps.add(self.good_creep) self.creeps.add(self.bad_creep) self.score = 0 self.ticks = 0 self.lives = -1 def step(self, dt): """ Perform one step of game emulation. """ dt /= 1000.0 self.ticks += 1 self.screen.fill(self.BG_COLOR) self.score += self.rewards["tick"] self._handle_player_events() self.player.update(self.dx, self.dy, dt) dx = self.player.pos.x-self.good_creep.pos.x dy = self.player.pos.y-self.good_creep.pos.y dist_to_good = math.sqrt(dx*dx + dy*dy) dx = self.player.pos.x-self.bad_creep.pos.x dy = self.player.pos.y-self.bad_creep.pos.y dist_to_bad = math.sqrt(dx*dx + dy*dy) reward = -dist_to_good if dist_to_bad < self.CREEP_BAD['radius_outer']: reward += 2.0*(dist_to_bad - self.CREEP_BAD['radius_outer']) / float(self.CREEP_BAD['radius_outer']) self.score += reward if self.ticks % 500 == 0: x,y = self._rngCreepPos() self.good_creep.pos.x = x self.good_creep.pos.y = y ndx = 0.0 if dist_to_bad == 0.0 else dx/dist_to_bad ndy = 0.0 if dist_to_bad == 0.0 else dy/dist_to_bad self.bad_creep.update(ndx, ndy, dt) self.good_creep.update(dt) self.player.draw(self.screen) self.creeps.draw(self.screen)