예제 #1
0
def min_max_alpha_beta(game, turn, max_depth, alpha=-sys.float_info.max, beta=sys.float_info.max):
    best_score_move = None
    game.available_moves()
    moves = game.moves

    if not moves:
        return 0, None

    for move in moves:
        new_game = Checkers()
        new_game.board_state = game.board_state
        new_game.turn = game.turn
        new_game.moves_queen_with_out_capture = game.moves_queen_with_out_capture
        new_game.move(move)
        winner = new_game.win
        if winner != 0:
            return winner*10000, move
        else:
            if max_depth <= 1:
                score = evaluate(new_game)
            else:
                score, _ = min_max_alpha_beta(new_game, -turn, max_depth-1, alpha, beta)
            if turn > 0:
                if score > alpha:
                    alpha = score
                    best_score_move = move
            else:
                if score < beta:
                    beta = score
                    best_score_move = move
            if alpha >= beta:
                break
    return alpha if turn > 0 else beta, best_score_move
예제 #2
0
        model2.set_session(session)
        session.run(tf.global_variables_initializer())
        GAME.reset()

        wins = 0
        for i in range(MIN_EXPERIENCES):
            GAME.available_moves()
            if GAME.win != 0:
                GAME.reset()
            move = random_play(GAME)
            action = encoding_move(move)
            GAME.move(move)
            if GAME.win == 0:
                new_GAME = Checkers()
                new_GAME.board_state = np.array(GAME.board_state)
                new_GAME.turn = GAME.turn
                new_GAME.moves_queen_with_out_capture = GAME.moves_queen_with_out_capture
                move = min_max.min_max_player(new_GAME, new_GAME.turn)
                GAME.move(move)
            reward = GAME.win
            experience_replay_buffer.add_experince(action, GAME.board_state,
                                                   reward)

        t0 = datetime.now()
        for i in range(num_episodes):
            total_t, episode_reward, duration, num_steps_in_episode, time_per_step, epsilon = play_one(
                total_t, experience_replay_buffer, model1, model2, epsilon)
            episode_rewards[i] = episode_reward

            last_100_avg = episode_rewards[max(0, i - 100):i + 1].mean()
            print("Episode:", i, "Duration:", duration, "Num steps:",