예제 #1
0
from life import Life
import pygame
import numpy as np
import time
import sys
import scipy.ndimage

px = 20
rows = 8
cols = 8
height = px * rows
width = px * cols
life = Life(rows=rows, cols=cols)
life.accept_tiles([(4, 4), (4, 6), (3, 3), (3, 4), (5, 5), (6, 6), (7, 7),
                   (6, 7), (5, 7)], 'blue')

pygame.init()
size = width, height
screen = pygame.display.set_mode(size)
pygame.event.set_grab(False)

BLANK = 0
LIVE_RED = 1
DEAD_RED = 2
LIVE_BLUE = 3
DEAD_BLUE = 4

blank = np.array([200, 200, 200])
blue = np.array([0, 0, 150])
red = np.array([150, 0, 0])
blueish = np.array([100, 100, 250])
예제 #2
0
class Game:
    def __init__(self):
        self.life = Life()
        self.reset()

    def reset(self):
        self.life.clean()
        self.turn = 'red'
        self.move_tile_counter = 0
        self.tiles_per_move = 3
        self.first_red_move = True
        self.first_blue_move = True

    def is_done(self):
        """
        There are two conditions for a completed game.
        1. All tiles are colored
        2. A color has no live tiles
        """

        blank_count = 0
        live_red_count = 0
        live_blue_count = 0

        for i in range(self.life.rows):
            for j in range(self.life.cols):
                cell = self.life.get_cell_value((i, j))
                if cell == BLANK:
                    blank_count += 1
                elif cell == LIVE_RED:
                    live_red_count += 1
                elif cell == LIVE_BLUE:
                    live_blue_count += 1

        if blank_count == 0 or live_red_count == 0 or live_blue_count == 0:
            if not self.first_red_move and not self.first_blue_move:
                return True
        return False

    def score(self):
        red = 0
        blue = 0
        for i in range(self.life.rows):
            for j in range(self.life.cols):
                cell = self.life.get_cell_value((i, j))
                if cell == LIVE_RED or cell == DEAD_RED:
                    red += 1
                elif cell == LIVE_BLUE or cell == DEAD_BLUE:
                    blue += 1
        return red, blue

    def winner(self):
        red, blue = self.score()
        if red > blue:
            return 'red'
        else:
            return 'blue'

    def accept_move(self, cell):
        if self.is_legal_move(cell, self.turn, debug=True):
            self.life.accept_tiles([cell], self.turn)
            self.move_tile_counter += 1

            if self.turn == 'red':
                self.first_red_move = False
            else:
                self.first_blue_move = False

            if self.move_tile_counter == self.tiles_per_move:
                if self.turn == 'red':
                    self.turn = 'blue'
                else:
                    self.turn = 'red'
                self.life.advance_state()
                self.move_tile_counter = 0

            return True
        return False

    def is_legal_move(self, cell, player, debug=False):
        """
        All moves must be made on dead or blank tiles.

        Your first tile can be placed anywhere as long as it doesn't border
        an opponent's live tile.

        After that, a legal move is one in which your chosen tile is a
        neighbor of a live tile you control.
        """

        if is_live(self.life.get_cell_value(cell)):
            if debug:
                print(f'{self.turn}, {cell}, attempted placement on live tile')
            return False

        red_neighbors, blue_neighbors = self.life.neighbors(cell)
        if player == 'red':
            ally_neighbors = red_neighbors
            opponent_neighbors = blue_neighbors
        else:
            ally_neighbors = blue_neighbors
            opponent_neighbors = red_neighbors

        if (self.first_red_move and player == 'red') or (self.first_blue_move
                                                         and player == 'blue'):
            if opponent_neighbors > 0:
                if debug:
                    print(
                        f'{self.turn}, {cell}, attempted placement next to opponent on first move'
                    )
                return False
            return True
        else:
            if ally_neighbors > 0:
                return True
            if debug:
                print(
                    f'{self.turn}, {cell}, attempted placement away from ally on non-starting move'
                )
            return False

    def encoded(self):
        player = self.turn

        # 5 possible cell states
        blank = np.array([1, 0, 0, 0, 0], dtype='bool')
        ally_alive = np.array([0, 1, 0, 0, 0], dtype='bool')
        ally_dead = np.array([0, 0, 1, 0, 0], dtype='bool')
        enemy_alive = np.array([0, 0, 0, 1, 0], dtype='bool')
        enemy_dead = np.array([0, 0, 0, 0, 1], dtype='bool')

        encoded_board = np.zeros((self.life.rows, self.life.cols, 5),
                                 dtype='bool')
        for i in range(self.life.rows):
            for j in range(self.life.cols):
                if self.life.get_cell_value((i, j)) == BLANK:
                    encoded_board[i][j] = blank
                elif self.life.get_cell_value((i, j)) == LIVE_RED:
                    if player == 'red':
                        encoded_board[i][j] = ally_alive
                    else:
                        encoded_board[i][j] = enemy_alive
                elif self.life.get_cell_value((i, j)) == LIVE_BLUE:
                    if player == 'blue':
                        encoded_board[i][j] = ally_alive
                    else:
                        encoded_board[i][j] = enemy_alive
                elif self.life.get_cell_value((i, j)) == DEAD_RED:
                    if player == 'red':
                        encoded_board[i][j] = ally_dead
                    else:
                        encoded_board[i][j] = enemy_dead
                elif self.life.get_cell_value((i, j)) == DEAD_BLUE:
                    if player == 'blue':
                        encoded_board[i][j] = ally_dead
                    else:
                        encoded_board[i][j] = enemy_dead

        move_tile_count_1 = np.array([1, 0, 0], dtype='bool')
        move_tile_count_2 = np.array([0, 1, 0], dtype='bool')
        move_tile_count_3 = np.array([0, 0, 1], dtype='bool')

        if self.move_tile_counter == 0:
            encoded_move_tile_count = move_tile_count_1
        elif self.move_tile_counter == 1:
            encoded_move_tile_count = move_tile_count_2
        else:
            encoded_move_tile_count = move_tile_count_3

        return (encoded_board, encoded_move_tile_count)
예제 #3
0
class Environment:
    def __init__(self, rounds_per_episode=16, display=False):
        self.life = Life(display=display)
        self.rows = self.life.rows
        self.cols = self.life.cols
        self.color = 'blue'

        self.rounds_per_episode = rounds_per_episode
        self.moves_per_round = 3  # TODO clean variable name like this in two_player

        # Inputs and outputs for our models
        self.x1_shape = (self.rows, self.cols, 3)  # board state
        self.x2_shape = (self.moves_per_round, )  # move tile counter
        self.x3_shape = (self.rounds_per_episode + 1, )  # round counter
        self.y_shape = (self.rows * self.cols + 1,
                        )  # actions, one additional for "passing"

        self.nb_actions = self.rows * self.cols + 1

        self.reset()

    def reset(self):
        # these variables are reset every episode
        self.episode_round_n = 0
        self.episode_action_n = 0
        self.move_tile_counter = 0
        self.first_move = True

        self.life.clean()
        return self.state()

    def encode_action(self, cell):
        action = np.zeros(self.y_shape, dtype='bool')
        if cell == None:
            action[self.cols * self.rows] = 1
        else:
            i, j = cell
            action[i * self.cols + j] = 1
        return action

    def decode_action(self, action):
        legal_move_mask = self.legal_move_mask()

        if legal_move_mask[action] == 0:  # move is illegal
            return None  # illegal move means pass

        if action == self.rows * self.cols:
            return None
        else:
            row, col = (action // self.cols, action % self.cols)
            return (row, col)

    def encode_board(self):
        """ Returns encoded board state with shape (rows, cols, 3,) """
        # 3 possible cell states
        blank = np.array([1, 0, 0], dtype='bool')
        alive = np.array([0, 1, 0], dtype='bool')
        dead = np.array([0, 0, 1], dtype='bool')

        encoded_board = np.zeros(self.x1_shape, dtype='bool')
        for i in range(self.life.rows):
            for j in range(self.life.cols):
                v = self.life.get_cell_value((i, j))
                if v == self.life.BLANK:
                    encoded_board[i][j] = blank
                elif v == self.life.LIVE_BLUE:
                    encoded_board[i][j] = alive
                elif v == self.life.DEAD_BLUE:
                    encoded_board[i][j] = dead
        return encoded_board

    def encode_move_tile_counter(self):
        """ Returns encoded form of how many moves have occured this round """
        move_tile_counter_i_mat = np.identity(self.moves_per_round,
                                              dtype='bool')
        encoded_move_tile_counter = move_tile_counter_i_mat[
            self.move_tile_counter]
        return encoded_move_tile_counter

    def encode_round_counter(self):
        """ Returns encoded form of how many rounds have occured this episode """
        round_identity_matrix = np.identity(self.rounds_per_episode + 1,
                                            dtype='bool')
        encoded_round_counter = round_identity_matrix[self.episode_round_n]
        return encoded_round_counter

    def state(self):
        """ Returns [board_state, move_tile_counter, round_counter]"""
        return self.encode_board(), self.encode_move_tile_counter(
        ), self.encode_round_counter()

    def step(self, action):
        cell = self.decode_action(action)
        if cell != None:  # cell being None would mean the agent "passed"
            self.life.accept_tiles([cell], self.color)
            self.first_move = False

        self.move_tile_counter += 1
        self.episode_action_n += 1

        if self.move_tile_counter == self.moves_per_round:
            self.life.advance_state()
            self.move_tile_counter = 0
            self.episode_round_n += 1

        state = self.state()
        done = self.is_done()
        if done:
            reward = self.life.count_colored()
        else:
            reward = 0

        return (state, reward, done, {})

    def is_done(self):
        if self.episode_round_n == self.rounds_per_episode:
            return True
        if self.life.count_live_total() == 0 and self.first_move == False:
            return True
        return False

    def is_legal_move(self, cell):
        """
        On the first tile placement on the first round of an episode,
        the tile can be legally placed anywhere.

        After that, the a legal tile placement must neighbor a live tile.
        """
        if cell == None:
            return True
        if is_live(self.life.get_cell_value(cell)):
            return False
        if self.first_move:
            return True
        elif self.life.count_live_neighbors(cell) == 0:
            return False
        return True

    def legal_move_mask(self):
        legal_move_mask = np.zeros((self.rows, self.cols), dtype='bool')
        for i in range(self.rows):
            for j in range(self.cols):
                legal_move_mask[i][j] = self.is_legal_move((i, j))
        flattened = np.ravel(legal_move_mask)
        return np.append(flattened, np.array([1]))
예제 #4
0
class Environment:
    def __init__(self, rounds_per_episode=8, display=False):
        self.life = Life(display=display)
        self.rows = self.life.rows
        self.cols = self.life.cols
        self.color = 'blue'

        self.rounds_per_episode = rounds_per_episode
        self.moves_per_round = 3 # TODO clean variable name like this in two_player

        # Inputs and outputs for our models
        self.x1_shape = (self.rows, self.cols, 3) # board state
        self.x2_shape = (self.moves_per_round,) # move tile counter
        self.x3_shape = (self.rounds_per_episode,) # round counter
        self.model_board_shape = (self.rows, self.cols*2, 3) # concatenated boards shape
        self.y_shape = (self.rows*self.cols + 1,) # actions, one additional for "passing"

        self.reset()

    def reset(self):
        # these variables are reset every episode
        self.episode_round_n = 0
        self.episode_action_n = 0
        self.move_tile_counter = 0
        self.first_move = True

        max_per_ep = self.rounds_per_episode*self.moves_per_round
        self.board_states = np.zeros((max_per_ep,) + self.x1_shape, dtype='bool')
        self.move_tile_counters = np.zeros((max_per_ep,) + self.x2_shape, dtype='bool')
        self.round_counters = np.zeros((max_per_ep,) + self.x3_shape, dtype='bool')
        self.actions = np.zeros((max_per_ep,) + self.y_shape, dtype='bool')
        self.final_board = np.zeros(self.x1_shape, dtype='bool')

        self.life.clean()

    def encode_action(self, cell):
        action = np.zeros(self.y_shape, dtype='bool')
        if cell == None:
            action[self.cols*self.rows] = 1
        else:
            i, j = cell
            action[i*self.cols + j] = 1
        return action

    def decode_action(self, action):
        if action == self.rows*self.cols:
            return None
        else:
            row, col = (action // self.cols, action % self.cols)
            return (row, col)

    def board_state(self):
        # 3 possible cell states
        blank = np.array([1, 0, 0], dtype='bool')
        alive = np.array([0, 1, 0], dtype='bool')
        dead = np.array([0, 0, 1], dtype='bool')

        encoded_board = np.zeros(self.x1_shape, dtype='bool')
        for i in range(self.life.rows):
            for j in range(self.life.cols):
                v = self.life.get_cell_value((i, j))
                if v == self.life.BLANK:
                    encoded_board[i][j] = blank
                elif v == self.life.LIVE_BLUE:
                    encoded_board[i][j] = alive
                elif v == self.life.DEAD_BLUE:
                    encoded_board[i][j] = dead
        return encoded_board

    def state(self):
        """ Returns (board_state, move_tile_counter, round_counter)"""
        move_tile_counter_i_mat = np.identity(self.moves_per_round, dtype='bool')
        encoded_move_tile_counter = move_tile_counter_i_mat[self.move_tile_counter]

        # encoded round_number, which tells you how many rounds have occured this episode
        round_identity_matrix = np.identity(self.rounds_per_episode, dtype='bool')
        encoded_round_counter = round_identity_matrix[self.episode_round_n]

        return (self.board_state(), encoded_move_tile_counter, encoded_round_counter)

    def step(self, action):
        cell = self.decode_action(action)
        if self.is_legal_move(cell):
            # for replay experience
            self.memorize_state()
            self.memorize_action(cell)

            if cell != None: # cell being None would mean the agent "passed"
                self.life.accept_tiles([cell], self.color)
                self.first_move = False

            self.move_tile_counter += 1
            self.episode_action_n += 1

            if self.move_tile_counter == self.moves_per_round:
                self.life.advance_state()
                self.move_tile_counter = 0
                self.episode_round_n += 1
            return

    def is_done(self):
        if self.episode_round_n == self.rounds_per_episode:
            self.memorize_final_board()
            return True
        if self.life.count_live_total() == 0 and self.first_move == False:
            self.memorize_final_board()
            return True
        return False

    def is_legal_move(self, cell):
        """
        On the first tile placement on the first round of an episode,
        the tile can be legally placed anywhere.

        After that, the a legal tile placement must neighbor a live tile.
        """
        if cell == None:
            return True
        if is_live(self.life.get_cell_value(cell)):
            return False
        if self.first_move:
            return True
        elif self.life.count_live_neighbors(cell) == 0:
            return False
        return True

    def legal_move_mask(self):
        legal_move_mask = np.zeros((self.rows, self.cols), dtype='bool')
        for i in range(self.rows):
            for j in range(self.cols):
                legal_move_mask[i][j] = self.is_legal_move((i, j))
        flattened = np.ravel(legal_move_mask)
        return np.append(flattened, np.array([1]))

    def memorize_final_board(self):
        encoded_final_board = self.board_state()
        self.final_board = encoded_final_board

    def memorize_action(self, cell):
        i = self.episode_action_n
        self.actions[i] = self.encode_action(cell)

    def memorize_state(self):
        i = self.episode_action_n
        encoded_board, encoded_move_tile_counter, encoded_round_counter = self.state()
        self.board_states[i] = encoded_board
        self.move_tile_counters[i] = encoded_move_tile_counter
        self.round_counters[i] = encoded_round_counter

    def get_episode_memories(self):
        # put the actual and final board states as a single 'image'
        final_board_expanded = np.array([self.final_board]*len(self.board_states), dtype='bool')
        board_states = np.concatenate((self.board_states, final_board_expanded), axis=2)

        # remove initial padding
        n = self.episode_action_n
        board_states = board_states[:n]
        move_tile_counters = self.move_tile_counters[:n]
        round_counters = self.round_counters[:n]
        actions = self.actions[:n]

        return (board_states, move_tile_counters, round_counters, actions)