Beispiel #1
0
def alphabeta_dtm(sp, a, s, depth, alpha, beta):
    """
    alpha beta pruning on a ground truth dtm 
    params:
        sp: previous state
        a: action
        s: current state
        depth: depth of search
        alpha
        beta
    returns:
        max_a: best action
        max_score: score of this action
    """
    if depth == 0:
        ep = Environment(sp)
        return None, -map_side_to_int(ep.get_turn()) * ep.action_outcome(a)
    elif depth == 1:
        e = Environment(s)
        as_pairs = e.get_as_pairs()
        outcomes = [
            -0.5 * map_side_to_int(e.get_turn()) * e.action_outcome(an)
            for (an, sn) in as_pairs
        ]
        max_o = max(outcomes)
        rand = outcomes.index(max_o)
        return as_pairs[rand][0], max_o
    else:
        best_an = None
        e = Environment(s)
        as_pairs = e.get_as_pairs()
        for (an, sn) in as_pairs:
            score = -0.5 * alphabeta_dtm(s, an, sn, depth - 1, -beta,
                                         -alpha)[1]
            if score >= beta:
                return an, beta
            elif score > alpha:
                alpha = score
                best_an = an
        return best_an, alpha
Beispiel #2
0
def alphabeta_outcome(sp, a, s, depth, alpha, beta):
    """
    alpha beta pruning on a ground truth outcome 
    params:
        sp: previous state
        a: action
        s: current state
        depth: depth of search
        alpha
        beta
    returns:
        max_a: best action
        max_score: score of this action
    """
    if depth < 1:
        env = Environment(sp)
        env.perform_action(a)
        o = map_side_to_int(env.get_turn()) * env.int_outcome()
        #print o
        return None, o
    env = Environment(s)
    as_pairs = env.get_as_pairs()
    if len(as_pairs) == 0:
        env = Environment(sp)
        env.perform_action(a)
        o = map_side_to_int(env.get_turn()) * env.int_outcome()
        #print o
        return None, o
    if depth == 1:
        outcomes = [
            0.5 * map_side_to_int(env.get_turn()) * env.action_outcome(a)
            for (a, sn) in as_pairs
        ]
        best = np.argmax(np.array(outcomes))
        best_o = outcomes[best]
        return as_pairs[best][0], best_o
    act = None
    for (a, sn) in as_pairs:
        score = -0.5 * alphabeta_outcome(s, a, sn, depth - 1, -beta, -alpha)[1]
        if score >= beta:
            return a, beta
        elif score > alpha:
            alpha = score
            act = a
    return act, alpha