from life import Life import pygame import numpy as np import time import sys import scipy.ndimage px = 20 rows = 8 cols = 8 height = px * rows width = px * cols life = Life(rows=rows, cols=cols) life.accept_tiles([(4, 4), (4, 6), (3, 3), (3, 4), (5, 5), (6, 6), (7, 7), (6, 7), (5, 7)], 'blue') pygame.init() size = width, height screen = pygame.display.set_mode(size) pygame.event.set_grab(False) BLANK = 0 LIVE_RED = 1 DEAD_RED = 2 LIVE_BLUE = 3 DEAD_BLUE = 4 blank = np.array([200, 200, 200]) blue = np.array([0, 0, 150]) red = np.array([150, 0, 0]) blueish = np.array([100, 100, 250])
class Game: def __init__(self): self.life = Life() self.reset() def reset(self): self.life.clean() self.turn = 'red' self.move_tile_counter = 0 self.tiles_per_move = 3 self.first_red_move = True self.first_blue_move = True def is_done(self): """ There are two conditions for a completed game. 1. All tiles are colored 2. A color has no live tiles """ blank_count = 0 live_red_count = 0 live_blue_count = 0 for i in range(self.life.rows): for j in range(self.life.cols): cell = self.life.get_cell_value((i, j)) if cell == BLANK: blank_count += 1 elif cell == LIVE_RED: live_red_count += 1 elif cell == LIVE_BLUE: live_blue_count += 1 if blank_count == 0 or live_red_count == 0 or live_blue_count == 0: if not self.first_red_move and not self.first_blue_move: return True return False def score(self): red = 0 blue = 0 for i in range(self.life.rows): for j in range(self.life.cols): cell = self.life.get_cell_value((i, j)) if cell == LIVE_RED or cell == DEAD_RED: red += 1 elif cell == LIVE_BLUE or cell == DEAD_BLUE: blue += 1 return red, blue def winner(self): red, blue = self.score() if red > blue: return 'red' else: return 'blue' def accept_move(self, cell): if self.is_legal_move(cell, self.turn, debug=True): self.life.accept_tiles([cell], self.turn) self.move_tile_counter += 1 if self.turn == 'red': self.first_red_move = False else: self.first_blue_move = False if self.move_tile_counter == self.tiles_per_move: if self.turn == 'red': self.turn = 'blue' else: self.turn = 'red' self.life.advance_state() self.move_tile_counter = 0 return True return False def is_legal_move(self, cell, player, debug=False): """ All moves must be made on dead or blank tiles. Your first tile can be placed anywhere as long as it doesn't border an opponent's live tile. After that, a legal move is one in which your chosen tile is a neighbor of a live tile you control. """ if is_live(self.life.get_cell_value(cell)): if debug: print(f'{self.turn}, {cell}, attempted placement on live tile') return False red_neighbors, blue_neighbors = self.life.neighbors(cell) if player == 'red': ally_neighbors = red_neighbors opponent_neighbors = blue_neighbors else: ally_neighbors = blue_neighbors opponent_neighbors = red_neighbors if (self.first_red_move and player == 'red') or (self.first_blue_move and player == 'blue'): if opponent_neighbors > 0: if debug: print( f'{self.turn}, {cell}, attempted placement next to opponent on first move' ) return False return True else: if ally_neighbors > 0: return True if debug: print( f'{self.turn}, {cell}, attempted placement away from ally on non-starting move' ) return False def encoded(self): player = self.turn # 5 possible cell states blank = np.array([1, 0, 0, 0, 0], dtype='bool') ally_alive = np.array([0, 1, 0, 0, 0], dtype='bool') ally_dead = np.array([0, 0, 1, 0, 0], dtype='bool') enemy_alive = np.array([0, 0, 0, 1, 0], dtype='bool') enemy_dead = np.array([0, 0, 0, 0, 1], dtype='bool') encoded_board = np.zeros((self.life.rows, self.life.cols, 5), dtype='bool') for i in range(self.life.rows): for j in range(self.life.cols): if self.life.get_cell_value((i, j)) == BLANK: encoded_board[i][j] = blank elif self.life.get_cell_value((i, j)) == LIVE_RED: if player == 'red': encoded_board[i][j] = ally_alive else: encoded_board[i][j] = enemy_alive elif self.life.get_cell_value((i, j)) == LIVE_BLUE: if player == 'blue': encoded_board[i][j] = ally_alive else: encoded_board[i][j] = enemy_alive elif self.life.get_cell_value((i, j)) == DEAD_RED: if player == 'red': encoded_board[i][j] = ally_dead else: encoded_board[i][j] = enemy_dead elif self.life.get_cell_value((i, j)) == DEAD_BLUE: if player == 'blue': encoded_board[i][j] = ally_dead else: encoded_board[i][j] = enemy_dead move_tile_count_1 = np.array([1, 0, 0], dtype='bool') move_tile_count_2 = np.array([0, 1, 0], dtype='bool') move_tile_count_3 = np.array([0, 0, 1], dtype='bool') if self.move_tile_counter == 0: encoded_move_tile_count = move_tile_count_1 elif self.move_tile_counter == 1: encoded_move_tile_count = move_tile_count_2 else: encoded_move_tile_count = move_tile_count_3 return (encoded_board, encoded_move_tile_count)
class Environment: def __init__(self, rounds_per_episode=16, display=False): self.life = Life(display=display) self.rows = self.life.rows self.cols = self.life.cols self.color = 'blue' self.rounds_per_episode = rounds_per_episode self.moves_per_round = 3 # TODO clean variable name like this in two_player # Inputs and outputs for our models self.x1_shape = (self.rows, self.cols, 3) # board state self.x2_shape = (self.moves_per_round, ) # move tile counter self.x3_shape = (self.rounds_per_episode + 1, ) # round counter self.y_shape = (self.rows * self.cols + 1, ) # actions, one additional for "passing" self.nb_actions = self.rows * self.cols + 1 self.reset() def reset(self): # these variables are reset every episode self.episode_round_n = 0 self.episode_action_n = 0 self.move_tile_counter = 0 self.first_move = True self.life.clean() return self.state() def encode_action(self, cell): action = np.zeros(self.y_shape, dtype='bool') if cell == None: action[self.cols * self.rows] = 1 else: i, j = cell action[i * self.cols + j] = 1 return action def decode_action(self, action): legal_move_mask = self.legal_move_mask() if legal_move_mask[action] == 0: # move is illegal return None # illegal move means pass if action == self.rows * self.cols: return None else: row, col = (action // self.cols, action % self.cols) return (row, col) def encode_board(self): """ Returns encoded board state with shape (rows, cols, 3,) """ # 3 possible cell states blank = np.array([1, 0, 0], dtype='bool') alive = np.array([0, 1, 0], dtype='bool') dead = np.array([0, 0, 1], dtype='bool') encoded_board = np.zeros(self.x1_shape, dtype='bool') for i in range(self.life.rows): for j in range(self.life.cols): v = self.life.get_cell_value((i, j)) if v == self.life.BLANK: encoded_board[i][j] = blank elif v == self.life.LIVE_BLUE: encoded_board[i][j] = alive elif v == self.life.DEAD_BLUE: encoded_board[i][j] = dead return encoded_board def encode_move_tile_counter(self): """ Returns encoded form of how many moves have occured this round """ move_tile_counter_i_mat = np.identity(self.moves_per_round, dtype='bool') encoded_move_tile_counter = move_tile_counter_i_mat[ self.move_tile_counter] return encoded_move_tile_counter def encode_round_counter(self): """ Returns encoded form of how many rounds have occured this episode """ round_identity_matrix = np.identity(self.rounds_per_episode + 1, dtype='bool') encoded_round_counter = round_identity_matrix[self.episode_round_n] return encoded_round_counter def state(self): """ Returns [board_state, move_tile_counter, round_counter]""" return self.encode_board(), self.encode_move_tile_counter( ), self.encode_round_counter() def step(self, action): cell = self.decode_action(action) if cell != None: # cell being None would mean the agent "passed" self.life.accept_tiles([cell], self.color) self.first_move = False self.move_tile_counter += 1 self.episode_action_n += 1 if self.move_tile_counter == self.moves_per_round: self.life.advance_state() self.move_tile_counter = 0 self.episode_round_n += 1 state = self.state() done = self.is_done() if done: reward = self.life.count_colored() else: reward = 0 return (state, reward, done, {}) def is_done(self): if self.episode_round_n == self.rounds_per_episode: return True if self.life.count_live_total() == 0 and self.first_move == False: return True return False def is_legal_move(self, cell): """ On the first tile placement on the first round of an episode, the tile can be legally placed anywhere. After that, the a legal tile placement must neighbor a live tile. """ if cell == None: return True if is_live(self.life.get_cell_value(cell)): return False if self.first_move: return True elif self.life.count_live_neighbors(cell) == 0: return False return True def legal_move_mask(self): legal_move_mask = np.zeros((self.rows, self.cols), dtype='bool') for i in range(self.rows): for j in range(self.cols): legal_move_mask[i][j] = self.is_legal_move((i, j)) flattened = np.ravel(legal_move_mask) return np.append(flattened, np.array([1]))
class Environment: def __init__(self, rounds_per_episode=8, display=False): self.life = Life(display=display) self.rows = self.life.rows self.cols = self.life.cols self.color = 'blue' self.rounds_per_episode = rounds_per_episode self.moves_per_round = 3 # TODO clean variable name like this in two_player # Inputs and outputs for our models self.x1_shape = (self.rows, self.cols, 3) # board state self.x2_shape = (self.moves_per_round,) # move tile counter self.x3_shape = (self.rounds_per_episode,) # round counter self.model_board_shape = (self.rows, self.cols*2, 3) # concatenated boards shape self.y_shape = (self.rows*self.cols + 1,) # actions, one additional for "passing" self.reset() def reset(self): # these variables are reset every episode self.episode_round_n = 0 self.episode_action_n = 0 self.move_tile_counter = 0 self.first_move = True max_per_ep = self.rounds_per_episode*self.moves_per_round self.board_states = np.zeros((max_per_ep,) + self.x1_shape, dtype='bool') self.move_tile_counters = np.zeros((max_per_ep,) + self.x2_shape, dtype='bool') self.round_counters = np.zeros((max_per_ep,) + self.x3_shape, dtype='bool') self.actions = np.zeros((max_per_ep,) + self.y_shape, dtype='bool') self.final_board = np.zeros(self.x1_shape, dtype='bool') self.life.clean() def encode_action(self, cell): action = np.zeros(self.y_shape, dtype='bool') if cell == None: action[self.cols*self.rows] = 1 else: i, j = cell action[i*self.cols + j] = 1 return action def decode_action(self, action): if action == self.rows*self.cols: return None else: row, col = (action // self.cols, action % self.cols) return (row, col) def board_state(self): # 3 possible cell states blank = np.array([1, 0, 0], dtype='bool') alive = np.array([0, 1, 0], dtype='bool') dead = np.array([0, 0, 1], dtype='bool') encoded_board = np.zeros(self.x1_shape, dtype='bool') for i in range(self.life.rows): for j in range(self.life.cols): v = self.life.get_cell_value((i, j)) if v == self.life.BLANK: encoded_board[i][j] = blank elif v == self.life.LIVE_BLUE: encoded_board[i][j] = alive elif v == self.life.DEAD_BLUE: encoded_board[i][j] = dead return encoded_board def state(self): """ Returns (board_state, move_tile_counter, round_counter)""" move_tile_counter_i_mat = np.identity(self.moves_per_round, dtype='bool') encoded_move_tile_counter = move_tile_counter_i_mat[self.move_tile_counter] # encoded round_number, which tells you how many rounds have occured this episode round_identity_matrix = np.identity(self.rounds_per_episode, dtype='bool') encoded_round_counter = round_identity_matrix[self.episode_round_n] return (self.board_state(), encoded_move_tile_counter, encoded_round_counter) def step(self, action): cell = self.decode_action(action) if self.is_legal_move(cell): # for replay experience self.memorize_state() self.memorize_action(cell) if cell != None: # cell being None would mean the agent "passed" self.life.accept_tiles([cell], self.color) self.first_move = False self.move_tile_counter += 1 self.episode_action_n += 1 if self.move_tile_counter == self.moves_per_round: self.life.advance_state() self.move_tile_counter = 0 self.episode_round_n += 1 return def is_done(self): if self.episode_round_n == self.rounds_per_episode: self.memorize_final_board() return True if self.life.count_live_total() == 0 and self.first_move == False: self.memorize_final_board() return True return False def is_legal_move(self, cell): """ On the first tile placement on the first round of an episode, the tile can be legally placed anywhere. After that, the a legal tile placement must neighbor a live tile. """ if cell == None: return True if is_live(self.life.get_cell_value(cell)): return False if self.first_move: return True elif self.life.count_live_neighbors(cell) == 0: return False return True def legal_move_mask(self): legal_move_mask = np.zeros((self.rows, self.cols), dtype='bool') for i in range(self.rows): for j in range(self.cols): legal_move_mask[i][j] = self.is_legal_move((i, j)) flattened = np.ravel(legal_move_mask) return np.append(flattened, np.array([1])) def memorize_final_board(self): encoded_final_board = self.board_state() self.final_board = encoded_final_board def memorize_action(self, cell): i = self.episode_action_n self.actions[i] = self.encode_action(cell) def memorize_state(self): i = self.episode_action_n encoded_board, encoded_move_tile_counter, encoded_round_counter = self.state() self.board_states[i] = encoded_board self.move_tile_counters[i] = encoded_move_tile_counter self.round_counters[i] = encoded_round_counter def get_episode_memories(self): # put the actual and final board states as a single 'image' final_board_expanded = np.array([self.final_board]*len(self.board_states), dtype='bool') board_states = np.concatenate((self.board_states, final_board_expanded), axis=2) # remove initial padding n = self.episode_action_n board_states = board_states[:n] move_tile_counters = self.move_tile_counters[:n] round_counters = self.round_counters[:n] actions = self.actions[:n] return (board_states, move_tile_counters, round_counters, actions)