class WaterWorld(base.PyGameWrapper):
    """
    Based Karpthy's WaterWorld in `REINFORCEjs`_.

    .. _REINFORCEjs: https://github.com/karpathy/reinforcejs

    Parameters
    ----------
    width : int
        Screen width.

    height : int
        Screen height, recommended to be same dimension as width.

    num_creeps : int (default: 3)
        The number of creeps on the screen at once.
    """

    def __init__(self,
                 width=48,
                 height=48,
                 num_creeps=3):

        actions = {
            "up": K_w,
            "left": K_a,
            "right": K_d,
            "down": K_s
        }

        base.PyGameWrapper.__init__(self, width, height, actions=actions)
        self.BG_COLOR = (255, 255, 255)
        self.N_CREEPS = num_creeps
        self.CREEP_TYPES = ["GOOD", "BAD"]
        self.CREEP_COLORS = [(40, 140, 40), (150, 95, 95)]
        radius = percent_round_int(width, 0.047)
        self.CREEP_RADII = [radius, radius]
        self.CREEP_REWARD = [
            self.rewards["positive"],
            self.rewards["negative"]]
        self.CREEP_SPEED = 0.25 * width
        self.AGENT_COLOR = (60, 60, 140)
        self.AGENT_SPEED = 0.25 * width
        self.AGENT_RADIUS = radius
        self.AGENT_INIT_POS = (self.width / 2, self.height / 2)

        self.creep_counts = {
            "GOOD": 0,
            "BAD": 0
        }

        self.dx = 0
        self.dy = 0
        self.player = None
        self.creeps = None

    def _handle_player_events(self):
        self.dx = 0
        self.dy = 0
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

            if event.type == pygame.KEYDOWN:
                key = event.key

                if key == self.actions["left"]:
                    self.dx -= self.AGENT_SPEED

                if key == self.actions["right"]:
                    self.dx += self.AGENT_SPEED

                if key == self.actions["up"]:
                    self.dy -= self.AGENT_SPEED

                if key == self.actions["down"]:
                    self.dy += self.AGENT_SPEED

    def _add_creep(self):
        creep_type = self.rng.choice([0, 1])

        creep = None
        pos = (0, 0)
        dist = 0.0

        while dist < 1.5:
            radius = self.CREEP_RADII[creep_type] * 1.5
            pos = self.rng.uniform(radius, self.height - radius, size=2)
            dist = math.sqrt(
                (self.player.pos.x - pos[0])**2 + (self.player.pos.y - pos[1])**2)

        creep = Creep(
            self.CREEP_COLORS[creep_type],
            self.CREEP_RADII[creep_type],
            pos,
            self.rng.choice([-1, 1], 2),
            self.rng.rand() * self.CREEP_SPEED,
            self.CREEP_REWARD[creep_type],
            self.CREEP_TYPES[creep_type],
            self.width,
            self.height,
            self.rng.rand()
        )

        self.creeps.add(creep)

        self.creep_counts[self.CREEP_TYPES[creep_type]] += 1

    def getGameState(self):
        """

        Returns
        -------

        dict
            * player x position.
            * player y position.
            * player x velocity.
            * player y velocity.
            * player distance to each creep


        """

        state = {
            "player_x": self.player.pos.x,
            "player_y": self.player.pos.y,
            "player_velocity_x": self.player.vel.x,
            "player_velocity_y": self.player.vel.y,
            "creep_dist": {
                "GOOD": [],
                "BAD": []
            }
        }

        for c in self.creeps:
            dist = math.sqrt((self.player.pos.x - c.pos.x) **
                             2 + (self.player.pos.y - c.pos.y)**2)
            state["creep_dist"][c.TYPE].append(dist)

        return state

    def getScore(self):
        return self.score

    def game_over(self):
        """
            Return bool if the game has 'finished'
        """
        return (self.creep_counts['GOOD'] == 0)

    def init(self):
        """
            Starts/Resets the game to its inital state
        """
        self.creep_counts = {"GOOD": 0, "BAD": 0}

        if self.player is None:
            self.player = Player(
                self.AGENT_RADIUS, self.AGENT_COLOR,
                self.AGENT_SPEED, self.AGENT_INIT_POS,
                self.width, self.height
            )

        else:
            self.player.pos = vec2d(self.AGENT_INIT_POS)
            self.player.vel = vec2d((0.0, 0.0))

        if self.creeps is None:
            self.creeps = pygame.sprite.Group()
        else:
            self.creeps.empty()

        for i in range(self.N_CREEPS):
            self._add_creep()

        self.score = 0
        self.ticks = 0
        self.lives = -1

    def step(self, dt):
        """
            Perform one step of game emulation.
        """
        dt /= 1000.0
        self.screen.fill(self.BG_COLOR)

        self.score += self.rewards["tick"]

        self._handle_player_events()
        self.player.update(self.dx, self.dy, dt)

        hits = pygame.sprite.spritecollide(self.player, self.creeps, True)
        for creep in hits:
            self.creep_counts[creep.TYPE] -= 1
            self.score += creep.reward
            self._add_creep()

        if self.creep_counts["GOOD"] == 0:
            self.score += self.rewards["win"]

        self.creeps.update(dt)

        self.player.draw(self.screen)
        self.creeps.draw(self.screen)
Beispiel #2
0
class WaterWorld(PyGameWrapper):
    """
    Based Karpthy's WaterWorld in `REINFORCEjs`_.

    .. _REINFORCEjs: https://github.com/karpathy/reinforcejs

    Parameters
    ----------
    width : int
        Screen width.

    height : int
        Screen height, recommended to be same dimension as width.

    num_creeps : int (default: 3)
        The number of creeps on the screen at once.
    """

    def __init__(self,
                 width=48,
                 height=48,
                 num_creeps=3):

        actions = {
            "up": K_w,
            "left": K_a,
            "right": K_d,
            "down": K_s
        }

        PyGameWrapper.__init__(self, width, height, actions=actions)
        self.BG_COLOR = (255, 255, 255)
        self.N_CREEPS = num_creeps
        self.CREEP_TYPES = ["GOOD", "BAD"]
        self.CREEP_COLORS = [(40, 140, 40), (150, 95, 95)]
        radius = percent_round_int(width, 0.02)
        self.CREEP_RADII = [radius, radius]
        self.CREEP_REWARD = [
            self.rewards["positive"],
            self.rewards["negative"]]
        self.CREEP_SPEED = 0.25 * width
        self.AGENT_COLOR = (60, 60, 140)
        self.AGENT_SPEED = 0.25 * width
        self.AGENT_RADIUS = radius
        self.AGENT_INIT_POS = (self.width / 2, self.height / 2)

        self.creep_counts = {
            "GOOD": 0,
            "BAD": 0
        }

        self.dx = 0
        self.dy = 0
        self.player = None
        self.creeps = None

    def _handle_player_events(self):
        self.dx = 0
        self.dy = 0
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

            if event.type == pygame.KEYDOWN:
                key = event.key
                
                if key == 117: #u pressed
                    print("u pressed")
                    self.ple.display_screen = not self.ple.display_screen
                    self.ple.force_fps = not self.ple.force_fps


                if key == self.actions["left"]:
                    self.dx -= self.AGENT_SPEED

                if key == self.actions["right"]:
                    self.dx += self.AGENT_SPEED

                if key == self.actions["up"]:
                    self.dy -= self.AGENT_SPEED

                if key == self.actions["down"]:
                    self.dy += self.AGENT_SPEED

    def _add_creep(self):
        creep_type = self.rng.choice([0, 1])

        creep = None
        pos = (0, 0)
        dist = 0.0

        while dist < 1.5:
            radius = self.CREEP_RADII[creep_type] * 1.5
            pos = self.rng.uniform(radius, self.height - radius, size=2)
            dist = math.sqrt(
                (self.player.pos.x - pos[0])**2 + (self.player.pos.y - pos[1])**2)

        creep = Creep(
            self.CREEP_COLORS[creep_type],
            self.CREEP_RADII[creep_type],
            pos,
            self.rng.choice([-1, 1], 2),
            self.rng.rand() * self.CREEP_SPEED,
            self.CREEP_REWARD[creep_type],
            self.CREEP_TYPES[creep_type],
            self.width,
            self.height,
            self.rng.rand()
        )

        self.creeps.add(creep)

        self.creep_counts[self.CREEP_TYPES[creep_type]] += 1

    def getGameState(self):
        """

        Returns
        -------

        dict
            * player x position.
            * player y position.
            * player x velocity.
            * player y velocity.
            * player distance to each creep


        """

        state = {
            "player_x": self.player.pos.x,
            "player_y": self.player.pos.y,
            "player_velocity_x": self.player.vel.x,
            "player_velocity_y": self.player.vel.y,
            "creep_dist": {
                "GOOD": [],
                "BAD": []
            },
            "creep_pos": {
                "GOOD": [],
                "BAD": []
            },
            
            "creep_vel": {
                "GOOD": [],
                "BAD": []
            }
        }

        for c in self.creeps:
            dist = math.sqrt((self.player.pos.x - c.pos.x) **
                             2 + (self.player.pos.y - c.pos.y)**2)
            state["creep_dist"][c.TYPE].append(dist)
            state["creep_pos"][c.TYPE].append([c.pos.x, c.pos.y])
            state["creep_vel"][c.TYPE].append([c.direction.x * c.speed, c.direction.y * c.speed])
        return state

    def getScore(self):
        return self.score

    def game_over(self):
        """
            Return bool if the game has 'finished'
        """
        return (self.creep_counts['GOOD'] == 0)

    def init(self):
        """
            Starts/Resets the game to its inital state
        """
        self.creep_counts = {"GOOD": 0, "BAD": 0}

        if self.player is None:
            self.player = Player(
                self.AGENT_RADIUS, self.AGENT_COLOR,
                self.AGENT_SPEED, self.AGENT_INIT_POS,
                self.width, self.height
            )

        else:
            self.player.pos = vec2d(self.AGENT_INIT_POS)
            self.player.vel = vec2d((0.0, 0.0))

        if self.creeps is None:
            self.creeps = pygame.sprite.Group()
        else:
            self.creeps.empty()

        for i in range(self.N_CREEPS):
            self._add_creep()

        self.score = 0
        self.ticks = 0
        self.lives = -1

    def step(self, dt):
        """
            Perform one step of game emulation.
        """
        dt /= 1000.0
        self.screen.fill(self.BG_COLOR)

        self.score += self.rewards["tick"]

        self._handle_player_events()
        self.player.update(self.dx, self.dy, dt)

        hits = pygame.sprite.spritecollide(self.player, self.creeps, True)
        for creep in hits:
            self.creep_counts[creep.TYPE] -= 1
            self.score += creep.reward
            self._add_creep()

        if self.creep_counts["GOOD"] == 0:
            self.score += self.rewards["win"]

        self.creeps.update(dt)

        self.player.draw(self.screen)
        self.creeps.draw(self.screen)
class PuckWorld(base.Game):
    """
    Based Karpthy's PuckWorld in `REINFORCEjs`_.
    
    .. _REINFORCEjs: https://github.com/karpathy/reinforcejs 

    Parameters
    ----------
    width : int
        Screen width.

    height : int
        Screen height, recommended to be same dimension as width.

    """
    def __init__(self,
        width=64,
        height=64):

        actions = {
            "up": K_w,
            "left": K_a,
            "right": K_d,
            "down": K_s
        }

        base.Game.__init__(self, width, height, actions=actions)

        self.CREEP_BAD = {
            "radius_center": percent_round_int(width, 0.047),
            "radius_outer": percent_round_int(width, 0.265),
            "color_center": (110, 45, 45),
            "color_outer": (150, 95, 95),
            "speed": 0.05*width
        }

        self.CREEP_GOOD = {
            "radius": percent_round_int(width, 0.047),
            "color": (40, 140, 40)
        }

        self.AGENT_COLOR = (60, 60, 140)
        self.AGENT_SPEED = 0.2*width 
        self.AGENT_RADIUS = percent_round_int(width, 0.047)
        self.AGENT_INIT_POS = (self.AGENT_RADIUS*1.5, self.AGENT_RADIUS*1.5)

        self.BG_COLOR = (255, 255, 255)
        self.dx = 0
        self.dy = 0
        self.ticks = 0

    def _handle_player_events(self):
        self.dx = 0.0
        self.dy = 0.0
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

            if event.type == pygame.KEYDOWN:
                key = event.key
                

                if key == self.actions["left"]:
                    self.dx -= self.AGENT_SPEED

                if key == self.actions["right"]:
                    self.dx += self.AGENT_SPEED

                if key == self.actions["up"]:
                    self.dy -= self.AGENT_SPEED

                if key == self.actions["down"]:
                    self.dy += self.AGENT_SPEED

    def getGameState(self):
        """
        Gets a non-visual state representation of the game.
        
        Returns
        -------

        dict
            * player x position.
            * player y position.
            * players x velocity.
            * players y velocity.
            * good creep x position.
            * good creep y position.
            * bad creep x position.
            * bad creep y position.

            See code for structure.

        """
        state = {
            "player_x": self.player.pos.x,
            "player_y": self.player.pos.y,
            "player_velocity_x": self.player.vel.x,
            "player_velocity_y": self.player.vel.y,
            "good_creep_x": self.good_creep.pos.x,
            "good_creep_y": self.good_creep.pos.y,
            "bad_creep_x": self.bad_creep.pos.x,
            "bad_creep_y": self.bad_creep.pos.y
        }

        return state

    def getScore(self):
        return self.score

    def game_over(self):
        """
            Return bool if the game has 'finished'
        """
        return False

    def _rngCreepPos(self):
        r = self.CREEP_GOOD['radius']
        x = self.rng.uniform(r*3, self.width-r*2.5)
        y = self.rng.uniform(r*3, self.height-r*2.5) 
        return ( x, y )

    def init(self):
        """
            Starts/Resets the game to its inital state
        """

        self.player = Player(self.AGENT_RADIUS, self.AGENT_COLOR, self.AGENT_SPEED, self.AGENT_INIT_POS, self.width, self.height) 

        self.good_creep = Creep(
            self.CREEP_GOOD['color'], 
            self.CREEP_GOOD['radius'], 
            self._rngCreepPos(),
            (1,1), 
            0.0,
            1.0,
            "GOOD", 
            self.width, 
            self.height,
            0.0 #jitter
        )

        self.bad_creep = PuckCreep((self.width, self.height), self.CREEP_BAD, self.screen_dim[0]*0.75, self.screen_dim[1]*0.75)

        self.creeps = pygame.sprite.Group()
        self.creeps.add(self.good_creep)
        self.creeps.add(self.bad_creep)
        

        self.score = 0
        self.ticks = 0
        self.lives = -1

    def step(self, dt):
        """
            Perform one step of game emulation.
        """
        dt /= 1000.0
        self.ticks += 1
        self.screen.fill(self.BG_COLOR)

        self.score += self.rewards["tick"]

        self._handle_player_events()
        self.player.update(self.dx, self.dy, dt)
        
        dx = self.player.pos.x-self.good_creep.pos.x
        dy = self.player.pos.y-self.good_creep.pos.y
        dist_to_good = math.sqrt(dx*dx + dy*dy)

        dx = self.player.pos.x-self.bad_creep.pos.x
        dy = self.player.pos.y-self.bad_creep.pos.y
        dist_to_bad = math.sqrt(dx*dx + dy*dy)

        reward = -dist_to_good
        if dist_to_bad < self.CREEP_BAD['radius_outer']:
            reward += 2.0*(dist_to_bad - self.CREEP_BAD['radius_outer']) / float(self.CREEP_BAD['radius_outer'])

        self.score += reward

        if self.ticks % 500 == 0:
            x,y = self._rngCreepPos()
            self.good_creep.pos.x = x
            self.good_creep.pos.y = y

        ndx = 0.0 if dist_to_bad == 0.0 else dx/dist_to_bad
        ndy = 0.0 if dist_to_bad == 0.0 else dy/dist_to_bad

        self.bad_creep.update(ndx, ndy, dt)
        self.good_creep.update(dt)

        self.player.draw(self.screen)
        self.creeps.draw(self.screen)