def splice_genomes(genomes, init_cell, weighted_probs, genome_len, max_attempts=1000): is_done = False new_genome = [] attempt = 0 while not is_done: attempt += 1 parent_1, parent_2 = choice(a=range(len(genomes)), size=2, replace=False, p=weighted_probs) genome0 = genomes[parent_1] genome1 = genomes[parent_2] new_genome = [] for i in range(0, genome_len, 2): new_genome.append(genome0[i]) new_genome.append(genome1[i + 1]) board = Isolation().result(init_cell) valid_genome = True for action in new_genome: board = Isolation(board=board.board, ply_count=board.ply_count + 1, locs=board.locs) if action not in board.actions(): valid_genome = False break board = board.result(action) is_done = valid_genome and (len(new_genome) == genome_len) is_done = True if attempt >= max_attempts else is_done return new_genome
class BaseCustomPlayerTest(unittest.TestCase): def setUp(self): self.time_limit = 150 self.move_0_state = Isolation() self.move_1_state = self.move_0_state.result(choice(self.move_0_state.actions())) self.move_2_state = self.move_1_state.result(choice(self.move_1_state.actions())) terminal_state = self.move_2_state while not terminal_state.terminal_test(): terminal_state = terminal_state.result(choice(terminal_state.actions())) self.terminal_state = terminal_state
def build_book(book, num_rounds=100): for num in range(num_rounds): state = Isolation() states = [] while state.ply_count <= 3: action = random.choice(state.actions()) player = state.player() states.append((state, player, action)) state = state.result(action) while not state.terminal_test(): action = alpha_beta(state, state.player()) player = state.player() state = state.result(action) win_0 = state.utility(0) > 0 win_1 = state.utility(1) > 0 assert win_0 != win_1 for s in states: state = s[0] player = s[1] action = s[2] if win_0: if player == 0: book[state][action] += 1 else: book[state][action] += -1 else: if player == 0: book[state][action] += -1 else: book[state][action] += 1 return book
def build_rand_genomes(n_genomes, init_cell, genome_len=4): assert (init_cell <= 114) and (init_cell >= 0), "Invalid opening cell value" genomes = [] for i in range(n_genomes): genome = [] board = Isolation().result(init_cell) for j in range(genome_len): try: board = Isolation(board=board.board, ply_count=board.ply_count + 1, locs=board.locs) next_action = random.choice(board.actions()) genome.append(next_action) board = board.result(next_action) except Exception as e: # try random sample from previous genomes, otherwise skip genome if len(genomes) > 0: genome = random.choice(genomes) else: break if len(genome) == genome_len: genomes.append(genome) return genomes
# locs , action # ( first, second) int # int int # Count the number of wins for key, value in first_n.items(): if key in second_n: print(key, "is in both") for key, value in second_n.items(): if key in first_n: print(key, "is in both") start_state = Isolation() for action in start_state.actions(): first_state = start_state.result(action) first_state_string = '{} {} {}'.format(first_state.board, first_state.locs[0], first_state.locs[1]) for action_s in first_state.actions(): # print(action,action_s) second_state = first_state.result(action_s) second_state_string = '{} {} {}'.format(second_state.board, second_state.locs[0], second_state.locs[1]) for action_t in second_state.actions(): # print(action,action_s) third_state = second_state.result(action_t) third_state_string = '{} {} {}'.format(third_state.board, third_state.locs[0], third_state.locs[1]) for action_f in third_state.actions(): # print(action,action_s) state = third_state.result(action_f) string = '{} {} {}'.format(state.board, state.locs[0], state.locs[1])
def _simulation(self, state: Isolation, leaf_player_id) -> float: while True: if state.terminal_test(): return state.utility(leaf_player_id) state = state.result(random.choice(state.actions()))
class GenomeTester: def __init__(self, init_cell, genome, search_depth): assert (init_cell <= 114) and (init_cell >= 0), "Invalid opening cell value" self.init_cell = init_cell self.board = Isolation() self.player0_moves = 0 self.player1_moves = 0 self.genome = genome self.search_depth = search_depth self.active_player = 0 self.move_history = [] def run(self): ############################ ####### mini max ########## def minimax(state, depth, player_id): def min_value(state, depth, player_id): if state.terminal_test(): return state.utility(player_id) if depth <= 0: return score(state, player_id) value = float("inf") for action in state.actions(): value = min( value, max_value(state.result(action), depth - 1, player_id)) return value def max_value(state, depth, player_id): if state.terminal_test(): return state.utility(player_id) if depth <= 0: return score(state, player_id) value = float("-inf") for action in state.actions(): value = max( value, min_value(state.result(action), depth - 1, player_id)) return value return max( state.actions(), key=lambda x: min_value(state.result(x), depth - 1, player_id)) def score(state, player_id): own_loc = state.locs[player_id] opp_loc = state.locs[1 - player_id] own_liberties = state.liberties(own_loc) opp_liberties = state.liberties(opp_loc) return len(own_liberties) - len(opp_liberties) ####### mini max ########## ############################ if self.player0_moves == 0: self.board = self.board.result(self.init_cell) if self.player1_moves == 0: self.board = self.board.result(random.choice(self.board.actions())) while not self.board.terminal_test(): if self.active_player == 0: if self.player0_moves < len(self.genome): next_move = self.genome[self.player0_moves] if next_move not in self.board.actions(): # move is most likely blocked (not as bad as a loss) #return self.genome, NEG_INF_INT next_move = minimax(self.board, self.search_depth, player_id=0) else: next_move = minimax(self.board, self.search_depth, player_id=0) self.player0_moves += 1 self.active_player = 1 else: next_move = minimax(self.board, self.search_depth, player_id=1) self.player1_moves += 1 self.active_player = 0 self.board = self.board.result(next_move) if self.player0_moves < len(self.genome): self.move_history.append(next_move) player0_score = self.board.utility(player_id=0) if player0_score < 0: # lost return self.genome, float("-inf") elif player0_score == 0: # game didnt finish return self.genome, NEG_INF_INT else: return self.genome, -1.0 * (self.player0_moves + self.player1_moves)
def run_one_generation(self, do_mutate=True): #print("Running generation", self.gen_count, "for cell", str(self.init_cell) + # "..."); sys.stdout.flush() ## Generate results for parent generation pool = ThreadPool(N_THREADS) results = pool.imap( lambda x: GenomeTester(self.init_cell, x, self.search_depth).run(), self.genomes) pool.close() pool.join() results = sorted(results, key=lambda x: x[1]) win_count = sum(map(lambda x: x[1] > NEG_INF_INT, results)) #print("Generation " + str(self.gen_count) + " won {:.1f}% of matches".format( # 100. * win_count / len(self.genomes)) + " (opt score " + str(results[-1][1]) + ")...") #sys.stdout.flush() ## Throw away lowest genomes n_deprecated = int(self.learning_rate * len(self.genomes)) results = results[n_deprecated:] #print("Throwing away", str(n_deprecated), "worst genomes..."); sys.stdout.flush() ## Create child generation def splice_genomes(genomes, init_cell, weighted_probs, genome_len, max_attempts=1000): is_done = False new_genome = [] attempt = 0 while not is_done: attempt += 1 parent_1, parent_2 = choice(a=range(len(genomes)), size=2, replace=False, p=weighted_probs) genome0 = genomes[parent_1] genome1 = genomes[parent_2] new_genome = [] for i in range(0, genome_len, 2): new_genome.append(genome0[i]) new_genome.append(genome1[i + 1]) board = Isolation().result(init_cell) valid_genome = True for action in new_genome: board = Isolation(board=board.board, ply_count=board.ply_count + 1, locs=board.locs) if action not in board.actions(): valid_genome = False break board = board.result(action) is_done = valid_genome and (len(new_genome) == genome_len) is_done = True if attempt >= max_attempts else is_done return new_genome # calculate sampling probabilties weighted_probs = list( map( lambda x: 1.0 / (-1 * x[1]) if x[1] > NEG_INF_INT else 1.0 / (-1 * NEG_INF_INT), results)) prob_factor = 1 / sum(weighted_probs) weighted_probs = [prob_factor * p for p in weighted_probs] self.genomes = list(map(lambda x: x[0], results)).copy() # splice genes on threads pool = ThreadPool(N_THREADS) spliced_genomes = pool.imap( lambda x: splice_genomes(self.genomes, self.init_cell, weighted_probs, self.genome_len), range(n_deprecated)) pool.close() pool.join() spliced_genomes = list(spliced_genomes) # add spliced genes to next gene pool n_spliced_genomes = 0 while (len(self.genomes) < self.n_genomes) and spliced_genomes: new_genome = spliced_genomes.pop() if new_genome: self.genomes.append(new_genome) n_spliced_genomes += 1 #print("Adding", str(n_spliced_genomes), "to genome pool..."); sys.stdout.flush() # mutate gene pool for next generation if do_mutate: n_mutations = int(self.mutate_rate * len(self.genomes)) #print("Mutating", n_mutations, "genomes in pool...") mutate_idxs = choice(a=range(len(self.genomes)), size=n_mutations, replace=False) for mutate_idx in mutate_idxs: try: genome = self.genomes[mutate_idx].copy() genome_idx = random.randint(0, self.genome_len - 1) board = Isolation().result(self.init_cell) for i in range(genome_idx): action = genome[i] board = Isolation(board=board.board, ply_count=board.ply_count + 1, locs=board.locs) board = board.result(action) for i in range(genome_idx, len(genome)): board = Isolation(board=board.board, ply_count=board.ply_count + 1, locs=board.locs) mutation_action = random.choice(board.actions()) board = board.result(mutation_action) genome[i] = mutation_action self.genomes[mutate_idx] = genome[0:self.genome_len] except Exception as e: pass # do nothing if mutation fails self.gen_count += 1 return 100. * win_count / len(self.genomes), results