Exemple #1
0
def alphabeta_batch_hist(V, F, env, hist, depth, alpha, beta):
    """alpha_beta_batch with added memory (dynamic programming) 
    params:
        hist: history of observed states
    """
    if depth < 1:
        S = F(env.current_state)
        return None, map_side_to_int(env.get_turn()) * V(S)
    as_pairs = env.get_as_pairs()
    if len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(env.current_state))
    # avoid repetition
    as_pairs = [(a, s) for (a, s) in as_pairs if s not in hist]
    if len(as_pairs) == 0:
        as_pairs = env.get_as_pairs()
    if depth == 1:
        S = np.array([F(s) for (a, s) in as_pairs if s])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))
        values = map_side_to_int(env.get_turn()) * V(S)
        index = np.argmax(values)
        return as_pairs[index][0], values[index, 0]
    else:
        act = None
        for (a, s) in as_pairs:
            env = Environment(s)
            score = -alphabeta_batch_hist(V, F, env, hist + [s], depth - 1,
                                          -beta, -alpha)[1]
            if score >= beta:
                return a, beta
            elif score > alpha:
                alpha = score
                act = a
        return act, alpha
Exemple #2
0
def recursive_eval(env, board, A, agents, start_turn, S):
    """yielding an score to evaluate an agent compared to the ground truth.
    Based on the metrics discussed in the thesis document."""
    assert board.epd() == env.current_state
    S.append(env.current_state)
    th_wdl = map_side_to_int(
        start_turn == env.get_turn()) * tablebases.TB.probe_wdl(
            board)  #1->winning, 0->drawing, -1->losing
    th_dtm = np.abs(tablebases.TB.probe_dtm(board))
    if board.is_game_over(claim_draw=True):
        if board.result() == '1-0':
            if start_turn:
                wdl = 1
            else:
                wdl = -1
        elif board.result() == '0-1':
            if start_turn:
                wdl = -1
            else:
                wdl = 1
        else:
            wdl = 0
        return wdl, 0, wdl, 0, env.current_state
    else:
        a, _, _ = agents[env.get_turn()].play(env)
        board.push_uci(a)
        _, _, wdl, dtm, s = recursive_eval(env, board, A, agents, start_turn,
                                           S)
        A.append((th_wdl, th_dtm, wdl, dtm + 1, s))
        return th_wdl, th_dtm, wdl, dtm + 1, env.current_state
Exemple #3
0
def alphabeta_native(V, F, env, depth, alpha, beta):
    """
    minimax with alpha beta pruning
    params:
        V: value function
        F: methode to transform data into features
        env: environment (chess position)
        depth: depth of search
        alpha
        beta
    returns:
        max_a: best action
        max_score: score of this action
    """
    as_pairs = env.get_as_pairs()
    if depth == 0 or len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(env.current_state))
    else:
        act = None
        for (a, s) in as_pairs:
            env = Environment(s)
            score = -alphabeta_native(V, F, env, depth - 1, -beta, -alpha)[1]
            if score >= beta:
                return a, beta
            elif score > alpha:
                alpha = score
                act = a
        return act, alpha
Exemple #4
0
def minimax(V, F, env, depth):
    """
    native minimax without optimizations
    params:
        V: value function
        F: methode to transform data into features
        env: environment (chess position)
        depth: depth of search
    returns:
        max_a: best action
        max_score: score of this action
    """
    as_pairs = env.get_as_pairs()
    if depth == 0 or len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(env.current_state))
    else:
        max_a = None
        max_score = None
        for (a, s) in as_pairs:
            env = Environment(state=s)
            score = -minimax(V, F, env, depth - 1)[1]
            if score > max_score:
                max_score = score
                max_a = a
        return max_a, max_score
Exemple #5
0
    def reward(self, a):
        """
        returns the reward compatible with action
        (compatible with color, so eg. black gets a positive reward when the
        reward would've been negative for white)
        """
        mr = 0
        dr = 0
        rr = 0

        if a is None:
            self.draw()

        try:
            result = self.engine.get_move_result(a)
        except:
            self.draw()
            print a
            raise
        if result == ChessMovesEngine.WHITE_WIN or result == ChessMovesEngine.BLACK_WIN:
            mr = self.mate_r
        elif result == ChessMovesEngine.DRAW:
            dr = self.draw_r
        elif self.use_tb:
            s_n = self.engine.moves[a][0]
            if sum([int(c.isalpha()) for c in s_n]) <= (5 + 1):
                mr = self.mate_r * tb.probe_result(s_n)
        if self.hist.get(self.current_state, 0) > 1:
            pass
        s = mr + dr + self.move_r
        # inverted, because reward is for the one playing last move
        return map_side_to_int(self.get_turn()) * s
Exemple #6
0
def alphabeta_outcome(sp, a, s, depth, alpha, beta):
    """
    alpha beta pruning on a ground truth outcome 
    params:
        sp: previous state
        a: action
        s: current state
        depth: depth of search
        alpha
        beta
    returns:
        max_a: best action
        max_score: score of this action
    """
    if depth < 1:
        env = Environment(sp)
        env.perform_action(a)
        o = map_side_to_int(env.get_turn()) * env.int_outcome()
        #print o
        return None, o
    env = Environment(s)
    as_pairs = env.get_as_pairs()
    if len(as_pairs) == 0:
        env = Environment(sp)
        env.perform_action(a)
        o = map_side_to_int(env.get_turn()) * env.int_outcome()
        #print o
        return None, o
    if depth == 1:
        outcomes = [
            0.5 * map_side_to_int(env.get_turn()) * env.action_outcome(a)
            for (a, sn) in as_pairs
        ]
        best = np.argmax(np.array(outcomes))
        best_o = outcomes[best]
        return as_pairs[best][0], best_o
    act = None
    for (a, sn) in as_pairs:
        score = -0.5 * alphabeta_outcome(s, a, sn, depth - 1, -beta, -alpha)[1]
        if score >= beta:
            return a, beta
        elif score > alpha:
            alpha = score
            act = a
    return act, alpha
Exemple #7
0
def probe_result(epd):
    '''
    return 1 if white wins, 0 if draw, -1 if black wins
    returns None if not in tablebases
    '''
    if sum([int(c.isalpha()) for c in epd]) <= (5 + 1):
        board = chess.Board.from_epd(epd)[0]
        turn = board.turn
        return map_side_to_int(turn) * TB.probe_wdl(board)
    else:
        return None
Exemple #8
0
def alphabeta_dtm(sp, a, s, depth, alpha, beta):
    """
    alpha beta pruning on a ground truth dtm 
    params:
        sp: previous state
        a: action
        s: current state
        depth: depth of search
        alpha
        beta
    returns:
        max_a: best action
        max_score: score of this action
    """
    if depth == 0:
        ep = Environment(sp)
        return None, -map_side_to_int(ep.get_turn()) * ep.action_outcome(a)
    elif depth == 1:
        e = Environment(s)
        as_pairs = e.get_as_pairs()
        outcomes = [
            -0.5 * map_side_to_int(e.get_turn()) * e.action_outcome(an)
            for (an, sn) in as_pairs
        ]
        max_o = max(outcomes)
        rand = outcomes.index(max_o)
        return as_pairs[rand][0], max_o
    else:
        best_an = None
        e = Environment(s)
        as_pairs = e.get_as_pairs()
        for (an, sn) in as_pairs:
            score = -0.5 * alphabeta_dtm(s, an, sn, depth - 1, -beta,
                                         -alpha)[1]
            if score >= beta:
                return an, beta
            elif score > alpha:
                alpha = score
                best_an = an
        return best_an, alpha
Exemple #9
0
def alphabeta_batch(V, F, env, depth, alpha, beta):
    """
    alpha beta pruning on a batch of positions 
    params:
        V: value function
        F: methode to transform data into features
        env: batch of environments (chess positions)
        depth: depth of search
        alpha
        beta
    returns:
        max_a: best action
        max_score: score of this action
    """
    if depth < 1:
        S = F(env.current_state)
        return None, map_side_to_int(env.get_turn()) * V(S)
    as_pairs = env.get_as_pairs()
    if len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(env.current_state))
    if depth == 1:
        S = np.array([F(s) for (a, s) in as_pairs])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))
        values = map_side_to_int(env.get_turn()) * V(S)
        index = np.argmax(values)
        return as_pairs[index][0], values[index, 0]
    else:
        act = None
        for (a, s) in as_pairs:
            env = Environment(s)
            score = -alphabeta_batch(V, F, env, depth - 1, -beta, -alpha)[1]
            if score >= beta:
                return a, beta
            elif score > alpha:
                alpha = score
                act = a
        return act, alpha
Exemple #10
0
def alphabeta_zobtrans(V, F, trans, env, z, depth, alpha, beta):
    """some doodling around with a self written zobrist hash function, did not
    perform as good as with the python hash function for dictionaries"""
    as_pairs = env.get_as_pairs()
    st = env.current_state
    if len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(env.current_state))
    if z in trans:
        if trans[z]['depth'] >= depth:
            return trans[z]['move'], trans[z]['score']
        else:
            "change order of lookup in favour of pv"
            ind = [a for (a, s) in as_pairs].index(trans[z]['move'])
            as_pairs[0], as_pairs[ind] = as_pairs[ind], as_pairs[0]
    if depth == 1:
        S = np.array([F(s) for (a, s) in as_pairs])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))
        values = map_side_to_int(env.get_turn()) * V(S)
        index = np.argmax(values)
        #a0,s0=minimax(V,F,env,1)
        #assert np.abs(values[index,0]-s0)<0.0001
        trans_add_entry(trans, z, depth, values[index, 0], as_pairs[index][0])
        return as_pairs[index][0], values[index, 0]
    else:
        act = None
        for (a, s) in as_pairs:
            zn = new_zobrist(z, st, a)
            env = Environment(s)
            score = -alphabeta_zobtrans(V, F, trans, env, zn, depth - 1, -beta,
                                        -alpha)[1]
            if score >= beta:
                return a, beta
            elif score > alpha:
                alpha = score
                act = a
        trans_add_entry(trans, z, depth, alpha, act)
        return act, alpha
Exemple #11
0
def alphabeta_batch_hist_leaf(V, F, env, hist, depth, alpha, beta):
    if depth < 1:
        S = F(env.current_state)
        return None, map_side_to_int(env.get_turn()) * V(S), env.current_state
    as_pairs = env.get_as_pairs()
    if len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(
            env.current_state)), env.current_state
    # avoid repetition
    as_pairs = [(a, s) for (a, s) in as_pairs if s not in hist]
    if len(as_pairs) == 0:
        as_pairs = env.get_as_pairs()
    if depth == 1:
        S = np.array([F(s) for (a, s) in as_pairs if s])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))
        values = map_side_to_int(env.get_turn()) * V(S)
        index = np.argmax(values)
        #env.draw()
        #a0,s0=minimax(V,F,env,1)
        #assert np.abs(values[index,0]-s0)<0.0001
        return as_pairs[index][0], values[index, 0], as_pairs[index][1]
    else:
        act = None
        best_leaf = None
        for (a, s) in as_pairs:
            env = Environment(s)
            _, score, leaf = alphabeta_batch_hist_leaf(V, F, env, hist,
                                                       depth - 1, -beta,
                                                       -alpha)
            score = -score
            if score >= beta:
                return a, beta, leaf
            elif score > alpha:
                alpha = score
                act = a
                best_leaf = leaf
        return act, alpha, best_leaf
Exemple #12
0
def alphabeta_trans(V, F, trans, env, depth, alpha, beta):
    as_pairs = env.get_as_pairs()
    if len(as_pairs) == 0:
        return None, map_side_to_int(env.get_turn()) * V(F(env.current_state))
    if env.current_state in trans:
        s = env.current_state
        if trans[s]['depth'] >= depth:
            return trans[s]['move'], trans[s]['score']
        else:
            "change order of lookup in favour of pv"
            ind = [a for (a, s) in as_pairs].index(trans[s]['move'])
            as_pairs[0], as_pairs[ind] = as_pairs[ind], as_pairs[0]
    if depth == 1:
        S = np.array([F(s) for (a, s) in as_pairs])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))
        values = map_side_to_int(env.get_turn()) * V(S)
        index = np.argmax(values)
        #a0,s0=minimax(V,F,env,1)
        #assert np.abs(values[index,0]-s0)<0.0001
        trans_add_entry(trans, env.current_state, depth, values[index, 0],
                        as_pairs[index][0])
        return as_pairs[index][0], values[index, 0]
    else:
        act = None
        for (a, s) in as_pairs:
            env = Environment(s)
            score = -alphabeta_trans(V, F, trans, env, depth - 1, -beta,
                                     -alpha)[1]
            if score >= beta:
                trans_add_entry(trans, env.current_state, depth, beta, a)
                return a, beta
            elif score > alpha:
                alpha = score
                act = a
        trans_add_entry(trans, env.current_state, depth, alpha, act)
        return act, alpha
Exemple #13
0
 def perform_action(self, a):
     """
     action is a uci move
     go to the next state
     return:
         (reward,next state)
     """
     r = self.reward(a)
     self.engine.do_move(a)
     self.current_state = self.engine.board
     if self.current_state in self.hist:
         r += map_side_to_int(self.get_turn()) * self.repet_r
         self.hist[self.current_state] += 1
     else:
         self.hist[self.current_state] = 1
     self.actions = self.engine.get_moves()
     return r, self.current_state
Exemple #14
0
    def get_av_pairs(self, env):
        """
        get action value (AV) pairs corresponding with Environment 
        """
        as_pairs = env.get_as_pairs()
        # need to take into account that it's a zero sum game
        # invert value if black
        S = [t[1] for t in as_pairs]
        N = len(S)
        S = np.array([faster_featurize(s) for s in S])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))

        v = self.approx.value(S)
        v = map_side_to_int(env.get_turn()) * v

        av = [(as_pairs[i][0], v[i, 0]) for i in xrange(N)]
        return av
Exemple #15
0
    def get_av_pairs(self, env):
        """
        get action value (AV) pairs corresponding with Environment 
        """
        as_pairs = env.get_as_pairs()
        as_pairs.append((None, env.current_state))
        # need to take into account that it's a zero sum game
        # invert value if black
        S = [t[1] for t in as_pairs]
        N = len(S)
        S = np.array([self.data_thread.put_and_get(s) for s in S])
        S = np.reshape(S, (S.shape[0], S.shape[-1]))

        with self.ep_task_lock:
            self.ep_task_q.put((self.name, S))
        v = self.conn.recv()

        if v is None:
            for p in mp.active_children():
                print p.name
                if p.name[:3] == 'Epi':
                    p.terminate()
        v = map_side_to_int(env.get_turn()) * v

        try:
            av = [(as_pairs[i][0], v[i, 0]) for i in xrange(N)]
        except:
            env.draw()
            print as_pairs
            print S.shape
            print N
            print v.shape
            import time
            time.sleep(10)

        return av