Exemple #1
0
def main():
    #print(s)
    #print(s.actions())
    s1 = State(s.board * -1)
    #print()
    #print(negamax2(s, -INF*10, INF*10, 2, θ))
    print()
    print(s1)
    print()
    #print(negamax2(s1, -INF*4, INF*4, 2, θ))

    depth = 2

    #v = negamax2(s1, -INF*4, INF*4, 2, θ)

    best_action = None
    alpha, beta, v = -4 * INF, 4 * INF, -4 * INF
    for a in s1.actions():
        child = s1.result(a)
        #child.board *= -1
        nmax = -negamax2(child, -beta, -alpha, depth - 1, θ)
        if nmax > alpha:
            alpha = nmax
            best_action = a

    print(best_action)
Exemple #2
0
class BasePlayer:
    def __init__(self, colour):
        self.time = timedelta(seconds=0.01)
        t = datetime.now()
        self.colour = colour
        self.state = State()
        self.n_v_two = None
        self.depth = DEPTH
        self.θo = np.load(path + 'o16.npy')

        self.θd = np.load(path + 'd16.npy')

        self.θm = np.load(path + 'm16.npy')

        self.θe = np.load(path + 'e16.npy')
        self.time += datetime.now() - t

    def action(self):
        t = datetime.now()
        if self.state.turn < 8:  #and not self.state.turn % 2:
            try:
                if str(self.state.board) in opening_book:
                    self.time += datetime.now() - t
                    return tuple(opening_book[str(self.state.board)])

            except:
                pass

        # n v two endgame
        if self.state.board[self.state.board < 0].sum(
        ) == -2 and self.state.board[self.state.board > 0].sum() > 2:
            if self.n_v_two is None:
                self.n_v_two = NvTwo()
            return self.format_action(self.n_v_two.move(self.state))

        # n v one endgame, or opponent hanging out in one stack
        if len(self.state.board[self.state.board < 0]
               ) == 1 and self.state.board[self.state.board > 0].sum() > 1:
            return self.format_action(n_v_one(self.state))

        if self.state.stage[0] == OPN:
            θ = self.θo
        elif self.state.stage[0] == DEV:
            θ = self.θd
        elif self.state.stage[0] == MID:
            θ = self.θm
        else:
            θ = self.θe

        if timedelta(seconds=59.6) < self.time:
            self.time += datetime.now() - t
            return self.format_action(self.state.actions()[0])

        if timedelta(seconds=56) < self.time:
            self.depth = 1

        depth = self.depth

        best_action = None
        alpha, beta = -4 * INF, 4 * INF
        for a in self.state.actions():
            child = self.state.result(a)
            nmax = -self.negamax(child, -beta, -alpha, depth - 1, θ)
            if nmax > alpha:
                alpha = nmax
                best_action = a
        self.time += datetime.now() - t
        return self.format_action(best_action)

    def update(self, colour, action):
        t = datetime.now()
        self.state = self.state.result(action)
        self.time += datetime.now() - t

    def format_action(self, action):
        if action[0] == BOOM:
            move, orig = action
            statement = (BOOM, orig)
        else:
            move, n, orig, dest = action
            statement = (MOVE, n, orig, dest)
        return statement

    def negamax(self, state, alpha, beta, depth, θ):
        if state.terminal_test():
            return state.utility()
        if depth == 0:
            return H(Φ(state), θ)

        v = -INF
        for a in state.actions():
            child = state.result(a)
            v = max(v, -1 * (self.negamax(child, -beta, -alpha, depth - 1, θ)))
            if v >= beta:
                return v
            alpha = max(alpha, v)

        return v
Exemple #3
0
def main2():
    s1 = State(s.board * -1)
    print(s1.actions())
Exemple #4
0
class BasePlayer:
    def __init__(self, colour):
        self.colour = colour
        self.state = State()
        self.n_v_two = None
        self.θo = np.load(path + 'w_opn-ab3.npy')

        self.θd = np.load(path + 'w_dev-ab3.npy')

        self.θm = np.load(path + 'w_mid-ab3.npy')

        self.θe = np.load(path + 'w_end-ab3.npy')

    def action(self):
        if self.state.turn < 8:  # and not self.state.turn % 2:
            try:
                if str(self.state.board) in opening_book:
                    return tuple(opening_book[str(self.state.board)])

            except:
                print('Failed to get opening move')
                assert (False)

        # n v two endgame
        if self.state.board[self.state.board < 0].sum(
        ) == -2 and self.state.board[self.state.board > 0].sum() > 2:
            if self.n_v_two is None:
                self.n_v_two = NvTwo()
            return self.format_action(self.n_v_two.move(self.state))

        # n v one endgame
        if len(self.state.board[self.state.board < 0]
               ) == 1 and self.state.board[self.state.board > 0].sum() > 1:
            return self.format_action(n_v_one(self.state))

        depth = DEPTH
        if self.state.stage[0] == OPN:
            θ = self.θo
        elif self.state.stage[0] == DEV:
            θ = self.θd
        elif self.state.stage[0] == MID:
            θ = self.θm
        else:
            θ = self.θe
            depth = 2 * depth

        best_action = None
        alpha, beta = -4 * INF, 4 * INF
        for a in self.state.actions():
            child = self.state.result(a)
            nmax = -self.negamax(child, -beta, -alpha, depth - 1, θ)
            if nmax > alpha:
                alpha = nmax
                best_action = a

        return self.format_action(best_action)

    def update(self, colour, action):
        self.state = self.state.result(action)
        # invert the sign of the pieces so that positive has the next move
        #self.state.board = -1*self.state.board

    def format_action(self, action):
        if action[0] == BOOM:
            move, orig = action
            statement = (BOOM, orig)
        else:
            move, n, orig, dest = action
            statement = (MOVE, n, orig, dest)
        return statement

    def negamax(self, state, alpha, beta, depth, θ):
        if state.terminal_test():
            return state.utility()
        if depth == 0:
            return H(Φ(state), θ)

        v = -INF
        for a in state.actions():
            child = state.result(a)
            v = max(v, -1 * (self.negamax(child, -beta, -alpha, depth - 1, θ)))
            if v >= beta:
                return v
            alpha = max(alpha, v)

        return v