Esempio n. 1
0
def speedy_minimax(state,
                   depth,
                   θ,
                   searched_states=None,
                   first=False,
                   memoised_states=None):
    if state.training_terminal_test():
        return state.utility(), searched_states
    if depth == 0:
        return H(Φ(state, memoised_states), θ), searched_states

    maxEval = -INF
    # set up multiprocessing sharing the memoised states dict,
    if first:
        with Manager() as m:
            d = m.dict(memoised_states)
            child = state.result(a)
            #child.board *= -1
            children = [(child, depth - 1, θ, searched_states, False, d)
                        for a in state.actions()]
            with Pool(PROCESSES) as p:
                results = (p.starmap(speedy_minimax, children))
            memoised_states.update(dict(d))

        evals = [res[0] for res in results]
        maxEval = max(evals)
        sets = [res[1] for res in results]
        for s in sets:
            searched_states.update(s)
    else:
        for a in state.actions():
            child = state.result(a)
            #child.board *= -1
            maxEval = max(
                maxEval, -speedy_minimax(child,
                                         depth - 1,
                                         θ,
                                         searched_states,
                                         memoised_states=memoised_states)[0])

    if searched_states is not None:
        # Store the state, it's V(s) and H(s)
        features = Φ(state, memoised_states)
        searched_states.add(
            (state.__hash__(), maxEval, H(features,
                                          θ), features.tostring(), depth))
    return maxEval, searched_states
Esempio n. 2
0
def negamax(state, alpha, beta, depth, θ, memoised_states=None):
    if state.training_terminal_test():
        return state.utility(train=True)
    if depth == 0:
        if memoised_states:
            return H(Φ(state, memoised_states), θ)
        return H(Φ(state), θ)

    v = -4 * INF
    for a in state.actions():
        child = state.result(a)
        v = max(v,
                -negamax(child, -beta, -alpha, depth - 1, θ, memoised_states))
        if v >= beta:
            return v
        alpha = max(alpha, v)
    return v
Esempio n. 3
0
def minimax(state, depth, θ, searched_states=None):
    if state.stages_terminal_test():
        return state.utility()
    if depth == 0:
        return H(Φ(state), θ)

    maxEval = -INF
    for a in state.actions():
        child = state.result(a)
        #child.board *= -1
        maxEval = max(maxEval, -minimax(child, depth - 1, θ, searched_states))

    if searched_states is not None:
        # Store the state, it's V(s) and H(s)
        features = Φ(state)
        searched_states.append((state, maxEval, H(features,
                                                  θ), features, depth))
    return maxEval
Esempio n. 4
0
def negamax(state, alpha, beta, depth, θ, memoised_states=None):
    if state.training_terminal_test():
        #print(state)
        #print(state.utility())
        #print(depth)
        return state.utility(train_end=True)
    if depth == 0:
        if memoised_states:
            return H(Φ(state, memoised_states), θ)
        return H(Φ(state), θ)

    v = -INF * 10
    for a in state.actions():
        child = state.result(a)
        nmax = -negamax(child, -beta, -alpha, depth - 1, θ, memoised_states)
        v = max(v, nmax)
        if depth == 1:
            pass  #print(v, nmax, a)
        alpha = max(alpha, v)
        if alpha >= beta:
            return v

    return v
Esempio n. 5
0
def min_value(state,
              alpha,
              beta,
              depth,
              θ,
              searched_states,
              memoised_features=None):
    if state.training_terminal_test():
        return state.utility(train=True), -state.utility(train=True)

    v0, v1 = 4 * INF, -4 * INF
    # we assume whole alpha beta called with even depth, so this is the last min value call
    if depth == 1:
        v1 = H(Φ(state, memoised_features), θ)

    for a in state.actions():
        child = state.result(a)
        pot_v0, pot_v1 = max_value(child, alpha, beta, depth - 1, θ,
                                   searched_states, memoised_features)
        v1 = max(v1, pot_v1)
        v0 = min(v0, pot_v0)
        if v0 <= alpha:
            if depth > 1:
                # update searched_states
                # v0 is a U_BOUND, v1 is a L_BOUND
                features = Φ(state, memoised_features)
                searched_states.append(
                    (state, v1, L_BOUND, H(features, θ), features, depth))
            return v0, v1
        beta = min(beta, v0)
    if depth > 1:
        # update searched states
        # v0 is EXACT, v1 is EXACT
        features = Φ(state, memoised_features)
        searched_states.append((state, v1, EXACT, H(features,
                                                    θ), features, depth))
    return v0, v1
Esempio n. 6
0
    def negamax(self, state, alpha, beta, depth, θ):
        if state.terminal_test():
            return state.utility()
        if depth == 0:
            return H(Φ(state), θ)

        v = -INF
        for a in state.actions():
            child = state.result(a)
            v = max(v, -1 * (self.negamax(child, -beta, -alpha, depth - 1, θ)))
            if v >= beta:
                return v
            alpha = max(alpha, v)

        return v
Esempio n. 7
0
def max_value(state,
              alpha,
              beta,
              depth,
              θ,
              searched_states,
              memoised_features=None):
    if state.training_terminal_test():
        return state.utility(train=True), -state.utility(train=True)
    if depth == 0:
        v0 = H(Φ(state, memoised_features), θ)
        return v0, -INF * 4

    v0, v1 = -4 * INF, 4 * INF
    for a in state.actions():
        child = state.result(a)
        pot_v0, pot_v1 = min_value(child, alpha, beta, depth - 1, θ,
                                   searched_states, memoised_features)
        v1 = min(v1, pot_v1)
        v0 = max(v0, pot_v0)
        if v0 >= beta:
            if depth > 1:
                # update searched states
                # v0 is a L_BOUND, v1 is a U_BOUND
                features = Φ(state, memoised_features)
                searched_states.append(
                    (state, v0, L_BOUND, H(features, θ), features, depth))
            return v0, v1
        alpha = max(alpha, v0)
    if depth > 1:
        # update searched states
        # v0 is EXACT, v1 is EXACT
        features = Φ(state, memoised_features)
        searched_states.append((state, v0, EXACT, H(features,
                                                    θ), features, depth))
    return v0, v1
Esempio n. 8
0
def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH):
    state = State()
    #memoised_features = {} if MULTI else None

    memoised_features = {}
    random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 +
                                    [16] * 4 + [32] * 8)
    # See if each player will use book
    X_use_book = np.random.choice([0, 0, 0, 1])
    O_use_book = np.random.choice([0, 0, 0, 1])

    while (not state.training_terminal_test()):
        print(f'Turn number {state.turn}')
        print(state)
        print()
        if state.stage[0] == OPN:
            θ = θo
        elif state.stage[0] == DEV:
            θ = θd
        elif state.stage[0] == MID:
            θ = θm
        else:
            θ = θe
            #depth = 2*TRAIN_DEPTH

        if ((state.turn % 2 and X_use_book) or
            (not state.turn % 2 and O_use_book)) and (str(state.board)
                                                      in opening_book):
            state = state.result(tuple(opening_book[str(state.board)]))

        elif state.turn < random_turns:
            num_actions = len(state.actions(False))
            state = state.result(
                state.actions(False)[np.random.choice(
                    [i for i in range(num_actions)])])
        else:
            if MULTI:
                searched_states = set()
                V = speedy_minimax(state,
                                   depth,
                                   θ,
                                   searched_states,
                                   first=True,
                                   memoised_states=memoised_features)[0]
            elif not AB_TRAIN:
                searched_states = []
                V = negamax(state, -10 * INF, 10 * INF, depth, θ,
                            memoised_features)

            if AB_TRAIN:
                searched_states = []
                alpha_beta_train(state, θ, searched_states, TRAIN_DEPTH,
                                 memoised_features)
                ab_weight_updates(searched_states, θ, depth, α, λ, MAX_CHANGE)
            else:
                Δθ = np.zeros(num_features)
                #for s, vs, hs, features, d in searched_states:
                #    # updates should only happen for states that match the player to play
                #    if not d % 2:
                #        features = np.frombuffer(features)
                #        #𝛿 = V(s) - H(features, θ)
                #        𝛿 = vs - hs
                #        Δθ += α*𝛿*features*λ**(depth-d)
                if V != 0:
                    features = Φ(state, memoised_features)
                    h = H(features, θ)
                    𝛿 = V - h
                    Δθ += α * 𝛿 * features

                for i in range(num_features):
                    if Δθ[i] > MAX_CHANGE:
                        Δθ[i] = MAX_CHANGE
                    elif Δθ[i] < -MAX_CHANGE:
                        Δθ[i] = -MAX_CHANGE
                θ += Δθ

            best_action = None
            alpha, beta, v = -4 * INF, 4 * INF, -4 * INF
            for a in state.actions():
                child = state.result(a)
                nmax = -negamax(child, -beta, -alpha, depth - 1, θ,
                                memoised_features)
                if nmax > alpha:
                    alpha = nmax
                    best_action = a

            state = state.result(best_action)
            print(alpha)

    print('Terminal State:')
    print(state)
    memoised_features = None
    gc.collect()
    return θo, θd, θm, θe