def alphabeta_dtm(sp, a, s, depth, alpha, beta): """ alpha beta pruning on a ground truth dtm params: sp: previous state a: action s: current state depth: depth of search alpha beta returns: max_a: best action max_score: score of this action """ if depth == 0: ep = Environment(sp) return None, -map_side_to_int(ep.get_turn()) * ep.action_outcome(a) elif depth == 1: e = Environment(s) as_pairs = e.get_as_pairs() outcomes = [ -0.5 * map_side_to_int(e.get_turn()) * e.action_outcome(an) for (an, sn) in as_pairs ] max_o = max(outcomes) rand = outcomes.index(max_o) return as_pairs[rand][0], max_o else: best_an = None e = Environment(s) as_pairs = e.get_as_pairs() for (an, sn) in as_pairs: score = -0.5 * alphabeta_dtm(s, an, sn, depth - 1, -beta, -alpha)[1] if score >= beta: return an, beta elif score > alpha: alpha = score best_an = an return best_an, alpha
def alphabeta_outcome(sp, a, s, depth, alpha, beta): """ alpha beta pruning on a ground truth outcome params: sp: previous state a: action s: current state depth: depth of search alpha beta returns: max_a: best action max_score: score of this action """ if depth < 1: env = Environment(sp) env.perform_action(a) o = map_side_to_int(env.get_turn()) * env.int_outcome() #print o return None, o env = Environment(s) as_pairs = env.get_as_pairs() if len(as_pairs) == 0: env = Environment(sp) env.perform_action(a) o = map_side_to_int(env.get_turn()) * env.int_outcome() #print o return None, o if depth == 1: outcomes = [ 0.5 * map_side_to_int(env.get_turn()) * env.action_outcome(a) for (a, sn) in as_pairs ] best = np.argmax(np.array(outcomes)) best_o = outcomes[best] return as_pairs[best][0], best_o act = None for (a, sn) in as_pairs: score = -0.5 * alphabeta_outcome(s, a, sn, depth - 1, -beta, -alpha)[1] if score >= beta: return a, beta elif score > alpha: alpha = score act = a return act, alpha