def alphabeta_batch_hist(V, F, env, hist, depth, alpha, beta): """alpha_beta_batch with added memory (dynamic programming) params: hist: history of observed states """ if depth < 1: S = F(env.current_state) return None, map_side_to_int(env.get_turn()) * V(S) as_pairs = env.get_as_pairs() if len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F(env.current_state)) # avoid repetition as_pairs = [(a, s) for (a, s) in as_pairs if s not in hist] if len(as_pairs) == 0: as_pairs = env.get_as_pairs() if depth == 1: S = np.array([F(s) for (a, s) in as_pairs if s]) S = np.reshape(S, (S.shape[0], S.shape[-1])) values = map_side_to_int(env.get_turn()) * V(S) index = np.argmax(values) return as_pairs[index][0], values[index, 0] else: act = None for (a, s) in as_pairs: env = Environment(s) score = -alphabeta_batch_hist(V, F, env, hist + [s], depth - 1, -beta, -alpha)[1] if score >= beta: return a, beta elif score > alpha: alpha = score act = a return act, alpha
def recursive_eval(env, board, A, agents, start_turn, S): """yielding an score to evaluate an agent compared to the ground truth. Based on the metrics discussed in the thesis document.""" assert board.epd() == env.current_state S.append(env.current_state) th_wdl = map_side_to_int( start_turn == env.get_turn()) * tablebases.TB.probe_wdl( board) #1->winning, 0->drawing, -1->losing th_dtm = np.abs(tablebases.TB.probe_dtm(board)) if board.is_game_over(claim_draw=True): if board.result() == '1-0': if start_turn: wdl = 1 else: wdl = -1 elif board.result() == '0-1': if start_turn: wdl = -1 else: wdl = 1 else: wdl = 0 return wdl, 0, wdl, 0, env.current_state else: a, _, _ = agents[env.get_turn()].play(env) board.push_uci(a) _, _, wdl, dtm, s = recursive_eval(env, board, A, agents, start_turn, S) A.append((th_wdl, th_dtm, wdl, dtm + 1, s)) return th_wdl, th_dtm, wdl, dtm + 1, env.current_state
def alphabeta_native(V, F, env, depth, alpha, beta): """ minimax with alpha beta pruning params: V: value function F: methode to transform data into features env: environment (chess position) depth: depth of search alpha beta returns: max_a: best action max_score: score of this action """ as_pairs = env.get_as_pairs() if depth == 0 or len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F(env.current_state)) else: act = None for (a, s) in as_pairs: env = Environment(s) score = -alphabeta_native(V, F, env, depth - 1, -beta, -alpha)[1] if score >= beta: return a, beta elif score > alpha: alpha = score act = a return act, alpha
def minimax(V, F, env, depth): """ native minimax without optimizations params: V: value function F: methode to transform data into features env: environment (chess position) depth: depth of search returns: max_a: best action max_score: score of this action """ as_pairs = env.get_as_pairs() if depth == 0 or len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F(env.current_state)) else: max_a = None max_score = None for (a, s) in as_pairs: env = Environment(state=s) score = -minimax(V, F, env, depth - 1)[1] if score > max_score: max_score = score max_a = a return max_a, max_score
def reward(self, a): """ returns the reward compatible with action (compatible with color, so eg. black gets a positive reward when the reward would've been negative for white) """ mr = 0 dr = 0 rr = 0 if a is None: self.draw() try: result = self.engine.get_move_result(a) except: self.draw() print a raise if result == ChessMovesEngine.WHITE_WIN or result == ChessMovesEngine.BLACK_WIN: mr = self.mate_r elif result == ChessMovesEngine.DRAW: dr = self.draw_r elif self.use_tb: s_n = self.engine.moves[a][0] if sum([int(c.isalpha()) for c in s_n]) <= (5 + 1): mr = self.mate_r * tb.probe_result(s_n) if self.hist.get(self.current_state, 0) > 1: pass s = mr + dr + self.move_r # inverted, because reward is for the one playing last move return map_side_to_int(self.get_turn()) * s
def alphabeta_outcome(sp, a, s, depth, alpha, beta): """ alpha beta pruning on a ground truth outcome params: sp: previous state a: action s: current state depth: depth of search alpha beta returns: max_a: best action max_score: score of this action """ if depth < 1: env = Environment(sp) env.perform_action(a) o = map_side_to_int(env.get_turn()) * env.int_outcome() #print o return None, o env = Environment(s) as_pairs = env.get_as_pairs() if len(as_pairs) == 0: env = Environment(sp) env.perform_action(a) o = map_side_to_int(env.get_turn()) * env.int_outcome() #print o return None, o if depth == 1: outcomes = [ 0.5 * map_side_to_int(env.get_turn()) * env.action_outcome(a) for (a, sn) in as_pairs ] best = np.argmax(np.array(outcomes)) best_o = outcomes[best] return as_pairs[best][0], best_o act = None for (a, sn) in as_pairs: score = -0.5 * alphabeta_outcome(s, a, sn, depth - 1, -beta, -alpha)[1] if score >= beta: return a, beta elif score > alpha: alpha = score act = a return act, alpha
def probe_result(epd): ''' return 1 if white wins, 0 if draw, -1 if black wins returns None if not in tablebases ''' if sum([int(c.isalpha()) for c in epd]) <= (5 + 1): board = chess.Board.from_epd(epd)[0] turn = board.turn return map_side_to_int(turn) * TB.probe_wdl(board) else: return None
def alphabeta_dtm(sp, a, s, depth, alpha, beta): """ alpha beta pruning on a ground truth dtm params: sp: previous state a: action s: current state depth: depth of search alpha beta returns: max_a: best action max_score: score of this action """ if depth == 0: ep = Environment(sp) return None, -map_side_to_int(ep.get_turn()) * ep.action_outcome(a) elif depth == 1: e = Environment(s) as_pairs = e.get_as_pairs() outcomes = [ -0.5 * map_side_to_int(e.get_turn()) * e.action_outcome(an) for (an, sn) in as_pairs ] max_o = max(outcomes) rand = outcomes.index(max_o) return as_pairs[rand][0], max_o else: best_an = None e = Environment(s) as_pairs = e.get_as_pairs() for (an, sn) in as_pairs: score = -0.5 * alphabeta_dtm(s, an, sn, depth - 1, -beta, -alpha)[1] if score >= beta: return an, beta elif score > alpha: alpha = score best_an = an return best_an, alpha
def alphabeta_batch(V, F, env, depth, alpha, beta): """ alpha beta pruning on a batch of positions params: V: value function F: methode to transform data into features env: batch of environments (chess positions) depth: depth of search alpha beta returns: max_a: best action max_score: score of this action """ if depth < 1: S = F(env.current_state) return None, map_side_to_int(env.get_turn()) * V(S) as_pairs = env.get_as_pairs() if len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F(env.current_state)) if depth == 1: S = np.array([F(s) for (a, s) in as_pairs]) S = np.reshape(S, (S.shape[0], S.shape[-1])) values = map_side_to_int(env.get_turn()) * V(S) index = np.argmax(values) return as_pairs[index][0], values[index, 0] else: act = None for (a, s) in as_pairs: env = Environment(s) score = -alphabeta_batch(V, F, env, depth - 1, -beta, -alpha)[1] if score >= beta: return a, beta elif score > alpha: alpha = score act = a return act, alpha
def alphabeta_zobtrans(V, F, trans, env, z, depth, alpha, beta): """some doodling around with a self written zobrist hash function, did not perform as good as with the python hash function for dictionaries""" as_pairs = env.get_as_pairs() st = env.current_state if len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F(env.current_state)) if z in trans: if trans[z]['depth'] >= depth: return trans[z]['move'], trans[z]['score'] else: "change order of lookup in favour of pv" ind = [a for (a, s) in as_pairs].index(trans[z]['move']) as_pairs[0], as_pairs[ind] = as_pairs[ind], as_pairs[0] if depth == 1: S = np.array([F(s) for (a, s) in as_pairs]) S = np.reshape(S, (S.shape[0], S.shape[-1])) values = map_side_to_int(env.get_turn()) * V(S) index = np.argmax(values) #a0,s0=minimax(V,F,env,1) #assert np.abs(values[index,0]-s0)<0.0001 trans_add_entry(trans, z, depth, values[index, 0], as_pairs[index][0]) return as_pairs[index][0], values[index, 0] else: act = None for (a, s) in as_pairs: zn = new_zobrist(z, st, a) env = Environment(s) score = -alphabeta_zobtrans(V, F, trans, env, zn, depth - 1, -beta, -alpha)[1] if score >= beta: return a, beta elif score > alpha: alpha = score act = a trans_add_entry(trans, z, depth, alpha, act) return act, alpha
def alphabeta_batch_hist_leaf(V, F, env, hist, depth, alpha, beta): if depth < 1: S = F(env.current_state) return None, map_side_to_int(env.get_turn()) * V(S), env.current_state as_pairs = env.get_as_pairs() if len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F( env.current_state)), env.current_state # avoid repetition as_pairs = [(a, s) for (a, s) in as_pairs if s not in hist] if len(as_pairs) == 0: as_pairs = env.get_as_pairs() if depth == 1: S = np.array([F(s) for (a, s) in as_pairs if s]) S = np.reshape(S, (S.shape[0], S.shape[-1])) values = map_side_to_int(env.get_turn()) * V(S) index = np.argmax(values) #env.draw() #a0,s0=minimax(V,F,env,1) #assert np.abs(values[index,0]-s0)<0.0001 return as_pairs[index][0], values[index, 0], as_pairs[index][1] else: act = None best_leaf = None for (a, s) in as_pairs: env = Environment(s) _, score, leaf = alphabeta_batch_hist_leaf(V, F, env, hist, depth - 1, -beta, -alpha) score = -score if score >= beta: return a, beta, leaf elif score > alpha: alpha = score act = a best_leaf = leaf return act, alpha, best_leaf
def alphabeta_trans(V, F, trans, env, depth, alpha, beta): as_pairs = env.get_as_pairs() if len(as_pairs) == 0: return None, map_side_to_int(env.get_turn()) * V(F(env.current_state)) if env.current_state in trans: s = env.current_state if trans[s]['depth'] >= depth: return trans[s]['move'], trans[s]['score'] else: "change order of lookup in favour of pv" ind = [a for (a, s) in as_pairs].index(trans[s]['move']) as_pairs[0], as_pairs[ind] = as_pairs[ind], as_pairs[0] if depth == 1: S = np.array([F(s) for (a, s) in as_pairs]) S = np.reshape(S, (S.shape[0], S.shape[-1])) values = map_side_to_int(env.get_turn()) * V(S) index = np.argmax(values) #a0,s0=minimax(V,F,env,1) #assert np.abs(values[index,0]-s0)<0.0001 trans_add_entry(trans, env.current_state, depth, values[index, 0], as_pairs[index][0]) return as_pairs[index][0], values[index, 0] else: act = None for (a, s) in as_pairs: env = Environment(s) score = -alphabeta_trans(V, F, trans, env, depth - 1, -beta, -alpha)[1] if score >= beta: trans_add_entry(trans, env.current_state, depth, beta, a) return a, beta elif score > alpha: alpha = score act = a trans_add_entry(trans, env.current_state, depth, alpha, act) return act, alpha
def perform_action(self, a): """ action is a uci move go to the next state return: (reward,next state) """ r = self.reward(a) self.engine.do_move(a) self.current_state = self.engine.board if self.current_state in self.hist: r += map_side_to_int(self.get_turn()) * self.repet_r self.hist[self.current_state] += 1 else: self.hist[self.current_state] = 1 self.actions = self.engine.get_moves() return r, self.current_state
def get_av_pairs(self, env): """ get action value (AV) pairs corresponding with Environment """ as_pairs = env.get_as_pairs() # need to take into account that it's a zero sum game # invert value if black S = [t[1] for t in as_pairs] N = len(S) S = np.array([faster_featurize(s) for s in S]) S = np.reshape(S, (S.shape[0], S.shape[-1])) v = self.approx.value(S) v = map_side_to_int(env.get_turn()) * v av = [(as_pairs[i][0], v[i, 0]) for i in xrange(N)] return av
def get_av_pairs(self, env): """ get action value (AV) pairs corresponding with Environment """ as_pairs = env.get_as_pairs() as_pairs.append((None, env.current_state)) # need to take into account that it's a zero sum game # invert value if black S = [t[1] for t in as_pairs] N = len(S) S = np.array([self.data_thread.put_and_get(s) for s in S]) S = np.reshape(S, (S.shape[0], S.shape[-1])) with self.ep_task_lock: self.ep_task_q.put((self.name, S)) v = self.conn.recv() if v is None: for p in mp.active_children(): print p.name if p.name[:3] == 'Epi': p.terminate() v = map_side_to_int(env.get_turn()) * v try: av = [(as_pairs[i][0], v[i, 0]) for i in xrange(N)] except: env.draw() print as_pairs print S.shape print N print v.shape import time time.sleep(10) return av