Beispiel #1
0
 def basic_move(self):
     '''
     This function return direction calculate by a basic/ non-pruning algorithm
     :return: a direction string
     '''
     best_move = None
     max_value = -np.inf
     # currently is max player. Facing on 4 directions, you iterate, compare the heuristic,
     # choose the best direction to go.
     for action in self.ACTIONS:
         # best_value = -np.inf
         board_copy = cp.deepcopy(self.board)
         if can_move(board_copy, action):
             move(board_copy, action)
             add_up_v2(board_copy, action)
             move(board_copy, action)
             best_value = self.basic_run(board_copy, self.max_depth, False)
             # if action == "RIGHT" or action == "DOWN":
             #     best_value += 500
             if best_value > max_value:
                 max_value = best_value
                 best_move = action
     if best_move == None:
         # raise ValueError("The best move is None! Check minimax algorithm.")
         return self.ACTIONS[np.random.randint(0, 3)]
     return best_move
Beispiel #2
0
    def basic_run(self, board, max_depth, is_max):
        if (max_depth == 0) or check_end(board):
            return self.eval(board)
        if is_max:
            best_value = -np.inf
            children = []
            for action in self.ACTIONS:
                board_copy = cp.deepcopy(board)
                if can_move(board_copy, action):
                    move(board_copy, action)
                    add_up_v2(board_copy, action)
                    move(board_copy, action)
                    children.append(board_copy)
            for child in children:
                best_value = max(best_value,
                                 self.basic_run(child, max_depth - 1, False))

            return best_value
        else:
            best_value = np.inf
            children = []
            empty_cells = find_empty_cells(board)
            for cell in empty_cells:
                board_copy = cp.deepcopy(board)
                board_copy[cell[0]][cell[1]] = 2
                children.append(board_copy)
                board_copy = cp.deepcopy(board)
                board_copy[cell[0]][cell[1]] = 4
                children.append(board_copy)
            for child in children:
                best_value = min(best_value,
                                 self.basic_run(child, max_depth - 1, True))
            return best_value
Beispiel #3
0
    def alpha_beta_run(self, board, max_depth, alpha, beta, is_max):
        if max_depth == 0:
            return self.eval(board)
        if not check_end(board):
            return self.eval(board)

        if is_max:
            best_value = -np.inf
            children = []
            for action in self.ACTIONS:
                board_copy = cp.deepcopy(board)
                if can_move(board_copy, action):
                    move(board_copy, action)
                    add_up_v2(board_copy, action)
                    move(board_copy, action)
                    children.append(board_copy)
            for child in children:
                best_value = max(
                    best_value,
                    self.alpha_beta_run(child, max_depth - 1, alpha, beta,
                                        False))
                if best_value >= beta:
                    return best_value
                alpha = max(alpha, best_value)
            return best_value
        else:
            best_value = np.inf
            children = []
            empty_cells = find_empty_cells(board)
            for cell in empty_cells:
                board_copy = cp.deepcopy(board)
                board_copy[cell[0]][cell[1]] = 2
                children.append(board_copy)
                board_copy = cp.deepcopy(board)
                board_copy[cell[0]][cell[1]] = 4
                children.append(board_copy)
            for child in children:
                best_value = min(
                    best_value,
                    self.alpha_beta_run(child, max_depth - 1, alpha, beta,
                                        True))
                if best_value <= alpha:
                    return best_value
                beta = min(beta, best_value)
            return best_value
Beispiel #4
0
def train_nn():
    board = make_board(4)
    initial_two(board)
    print_board(board)
    curr_score = 0
    game_len = 0
    last_grid = None
    while not check_end(board):
        board_list = []
        for action in Actions:
            board_copy = cp.deepcopy(board)
            if can_move(board_copy, action):
                move(board_copy, action)
                score = add_up_v2(board_copy, action)
                move(board_copy, action)
                board_list.append((board_copy, action, score))
        if board_list:
            boards = np.array([make_input(g) for g, m, s in board_list],
                              dtype=floatX)
            p = P(boards).flatten()
            game_len += 1
            print(game_len)
            best_move = -1
            best_v = None
            for i, (g, m, s) in enumerate(board_list):
                v = 2 * s + p[i]
                if best_v is None or v > best_v:
                    best_v = v
                    best_move = m
                    best_score = 2 * s
                    best_grid = boards[i]
            move(board, best_move)
            curr_score += add_up_v2(board, best_move)
            move(board, best_move)
            simple_add_num(board)
            print_board(board)
        else:
            best_v = 0
            best_grid = None
        if last_grid is not None:
            vchange(last_grid, best_v)
        last_grid = best_grid

    return game_len, find_max_cell(board), curr_score, board