def basic_move(self): ''' This function return direction calculate by a basic/ non-pruning algorithm :return: a direction string ''' best_move = None max_value = -np.inf # currently is max player. Facing on 4 directions, you iterate, compare the heuristic, # choose the best direction to go. for action in self.ACTIONS: # best_value = -np.inf board_copy = cp.deepcopy(self.board) if can_move(board_copy, action): move(board_copy, action) add_up_v2(board_copy, action) move(board_copy, action) best_value = self.basic_run(board_copy, self.max_depth, False) # if action == "RIGHT" or action == "DOWN": # best_value += 500 if best_value > max_value: max_value = best_value best_move = action if best_move == None: # raise ValueError("The best move is None! Check minimax algorithm.") return self.ACTIONS[np.random.randint(0, 3)] return best_move
def basic_run(self, board, max_depth, is_max): if (max_depth == 0) or check_end(board): return self.eval(board) if is_max: best_value = -np.inf children = [] for action in self.ACTIONS: board_copy = cp.deepcopy(board) if can_move(board_copy, action): move(board_copy, action) add_up_v2(board_copy, action) move(board_copy, action) children.append(board_copy) for child in children: best_value = max(best_value, self.basic_run(child, max_depth - 1, False)) return best_value else: best_value = np.inf children = [] empty_cells = find_empty_cells(board) for cell in empty_cells: board_copy = cp.deepcopy(board) board_copy[cell[0]][cell[1]] = 2 children.append(board_copy) board_copy = cp.deepcopy(board) board_copy[cell[0]][cell[1]] = 4 children.append(board_copy) for child in children: best_value = min(best_value, self.basic_run(child, max_depth - 1, True)) return best_value
def alpha_beta_run(self, board, max_depth, alpha, beta, is_max): if max_depth == 0: return self.eval(board) if not check_end(board): return self.eval(board) if is_max: best_value = -np.inf children = [] for action in self.ACTIONS: board_copy = cp.deepcopy(board) if can_move(board_copy, action): move(board_copy, action) add_up_v2(board_copy, action) move(board_copy, action) children.append(board_copy) for child in children: best_value = max( best_value, self.alpha_beta_run(child, max_depth - 1, alpha, beta, False)) if best_value >= beta: return best_value alpha = max(alpha, best_value) return best_value else: best_value = np.inf children = [] empty_cells = find_empty_cells(board) for cell in empty_cells: board_copy = cp.deepcopy(board) board_copy[cell[0]][cell[1]] = 2 children.append(board_copy) board_copy = cp.deepcopy(board) board_copy[cell[0]][cell[1]] = 4 children.append(board_copy) for child in children: best_value = min( best_value, self.alpha_beta_run(child, max_depth - 1, alpha, beta, True)) if best_value <= alpha: return best_value beta = min(beta, best_value) return best_value
def train_nn(): board = make_board(4) initial_two(board) print_board(board) curr_score = 0 game_len = 0 last_grid = None while not check_end(board): board_list = [] for action in Actions: board_copy = cp.deepcopy(board) if can_move(board_copy, action): move(board_copy, action) score = add_up_v2(board_copy, action) move(board_copy, action) board_list.append((board_copy, action, score)) if board_list: boards = np.array([make_input(g) for g, m, s in board_list], dtype=floatX) p = P(boards).flatten() game_len += 1 print(game_len) best_move = -1 best_v = None for i, (g, m, s) in enumerate(board_list): v = 2 * s + p[i] if best_v is None or v > best_v: best_v = v best_move = m best_score = 2 * s best_grid = boards[i] move(board, best_move) curr_score += add_up_v2(board, best_move) move(board, best_move) simple_add_num(board) print_board(board) else: best_v = 0 best_grid = None if last_grid is not None: vchange(last_grid, best_v) last_grid = best_grid return game_len, find_max_cell(board), curr_score, board