Esempi in Python per State, esempi in Python per tablut.state.tablut_state.State

Esempio n. 1

0

Mostra file

 def run(self):
     """Client's body."""
     try:
         self.connect()
         self.send_string(self.player_name)
         state = State(self.read_string())
         self.state_hash_tables_tmp[0][state.get_hash()] = {
             "value": 0,
             'used': 1
         }
         while True:  # Playing
             if self.color == state.turn:  # check turn
                 action, value = choose_action(
                     state, self.game, self.state_hash_tables_tmp
                 )  # Retrieving best action and its value and pass weights
                 self.send_string(action_to_server_format(action))
                 print("Choosen action:", action_to_server_format(action))
                 print("Choosen action value:", value)
             else:
                 clear_hash_table(self.state_hash_tables_tmp, state)
             state = State(self.read_string())
             update_used(self.state_hash_tables_tmp, state,
                         self.game.weights, self.game.color)
     except Exception as e:
         print(e)
     finally:
         print("Game ended.")

Esempio n. 2

0

Mostra file

def search_thread(state, action, game, best_score, beta, depth, max_depth,
                  time_start, state_hash_table, stop, active, id_m,
                  best_scores):
    lock_time.acquire()
    tmp_time = stop[0]
    lock_time.release()
    while not tmp_time:
        lock_bool.acquire()
        active[id_m] = False
        tmp = active[id_m]
        lock_bool.release()
        while not tmp and not tmp_time:
            lock_bool.acquire()
            tmp = active[id_m]
            lock_bool.release()
            lock_time.acquire()
            tmp_time = stop[0]
            lock_time.release()
        if tmp_time:
            break
        lock_value.acquire()
        tmp_best = best_score[0]
        a = action[id_m]
        lock_value.release()
        v = min_value(
            State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])),
            game, tmp_best, beta, depth + 1, max_depth, time_start,
            state_hash_table)
        if v > best_scores[id_m]:
            best_scores[id_m] = v
        lock_time.acquire()
        tmp_time = stop[0]
        lock_time.release()
    return

Esempio n. 3

0

Mostra file

def min_value(state, game, alpha, beta, depth, max_depth, time_start,
              state_hash_table):
    tmp_victory = state.check_victory()
    if tmp_victory == -1 and game.color == "BLACK":  # king captured and black player -> Win
        return MAX_VAL_HEURISTIC
    elif tmp_victory == -1 and game.color == "WHITE":  # King captured and white player -> Lose
        return -MAX_VAL_HEURISTIC
    elif tmp_victory == 1 and game.color == "BLACK":  # King escaped and black player -> Lose
        return -MAX_VAL_HEURISTIC
    elif tmp_victory == 1 and game.color == "WHITE":  # King escaped and white player -> Win
        return MAX_VAL_HEURISTIC
    state_hash = state.get_hash()
    index_checkers = MAX_NUM_CHECKERS - cont_pieces(state)
    lock_hash.acquire()
    hash_result = copy.deepcopy(
        state_hash_table[index_checkers].get(state_hash))
    lock_hash.release()
    all_actions = None
    if hash_result is not None:
        if hash_result['used'] == 1:
            return -DRAW_POINTS
        if hash_result.get('all_actions') is not None:
            all_actions = hash_result.get('all_actions')
    if cutoff_test(depth, max_depth, game.max_time,
                   time_start):  # If reached maximum depth or total time
        if hash_result is not None and hash_result.get("value") is not None:
            return hash_result[
                "value"]  # If state previously evaluated don't recompute heuristic
        value = state.compute_heuristic(
            game.weights, game.color)  # If state not previously evaluated
        add_to_hash(state_hash_table, state_hash, value, None,
                    index_checkers)  # Add state and value to hash table
        return value

    # Body
    v = np.inf
    if all_actions is None:
        all_actions = game.produce_actions(state)
        if hash_result is not None:
            add_to_hash(state_hash_table, state_hash, hash_result['value'],
                        all_actions, index_checkers)
    if len(all_actions) == 0:
        return MAX_VAL_HEURISTIC
    for a in all_actions:
        v = min(
            v,
            max_value(
                State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])),
                game, alpha, beta, depth + 1, max_depth, time_start,
                state_hash_table))
        if v <= alpha:
            return v
        beta = min(beta, v)
    return v

Esempio n. 4

0

Mostra file

def min_value(state, game, alpha, beta, depth, max_depth, htable, time_start):
    if game.max_time - (time.time() - time_start) <= 0.1:
        return False
    hash_key = state.get_hash()
    update_hash = True
    hash_available = False
    if hash_key in htable:
        hash_available = True
        hash_state = htable.get(hash_key)
        if hash_state.get_forward_depth() >= max_depth - depth:

            return hash_state.get_value()
        else:
            update_hash = False

    if depth == max_depth:
        return eval_fn(state, game)

    v = np.inf
    if hash_available:
        all_actions = hash_state.get_actions()
    else:
        all_actions = game.produce_actions(state)
    np.random.shuffle(all_actions)
    for a in all_actions:
        if game.max_time - (time.time() - time_start) <= 0.1:
            return False
        new_state = State(second_init_args=(state, a[0], a[1], a[2], a[3],
                                            a[4]))
        v = min(
            v,
            max_value(new_state, game, alpha, beta, depth + 1, max_depth,
                      htable))
        if update_hash or not hash_available:
            key = new_state.get_hash()
            htable[key] = HashEntry(key, v, all_actions, max_depth, depth)
        if v <= alpha:
            return v
        beta = min(beta, v)
    return v

Esempio n. 5

0

Mostra file

File: min_max.py Progetto: carlo98/tablut-THOR

def choose_action(state, game, state_hash_table):
    """
    Search for the best action using min max with alpha beta pruning
    iteratively increasing the maximum depth.
    It stops only when available time is almost up.
    """
    time_start = time.time()
    best_score_end = -np.inf
    beta = np.inf
    best_action = None
    best_action_end = None
    max_depth = 2
    num_state_visited = [0]
    flag = False
    all_actions = game.produce_actions(
        state)  # Getting all possible actions given state
    while time.time() - time_start < game.max_time:
        cont = 0
        best_score = -np.inf
        for a in all_actions:
            v = min_value(
                State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])),
                game, best_score, beta, 1, max_depth, time_start,
                state_hash_table, num_state_visited)
            cont += 1
            if v > best_score:
                best_score = v
                best_action = a
            if time.time() - time_start >= game.max_time:
                break
        # If search at current maximum depth is finished, update best action
        if cont == len(all_actions):
            best_score_end = best_score
            best_action_end = best_action
            flag = True
            print("Depth reached:", max_depth)
        elif flag:
            print("Depth reached:", max_depth - 1)
        else:
            print("Minimum depth not reached")
        max_depth += 1  # Iteratively increasing depth

    print(num_state_visited, " state visited state in ",
          time.time() - time_start, " seconds.")
    return best_action_end, best_score_end

Esempio n. 6

0

Mostra file

def alpha_beta_cutoff_search(state, game, max_depth, htable, time_start):
    """Search game to determine best action; use alpha-beta pruning.
    This version cuts off search and uses an evaluation function."""

    best_score = -np.inf
    beta = np.inf
    best_action = None
    action_list = game.produce_actions(state)
    np.random.shuffle(action_list)
    for a in action_list:
        new_state = State(second_init_args=(state, a[0], a[1], a[2], a[3],
                                            a[4]))
        v = min_value(new_state, game, best_score, beta, 1, max_depth, htable,
                      time_start)
        if v > best_score:
            best_score = v
            best_action = a
    return best_action, best_score, max_depth

Esempio n. 7

0

Mostra file

def choose_action(state, game, state_hash_table):
    """
    Search for the best action using min max with alpha beta pruning
    iteratively increasing the maximum depth.
    It stops only when available time is almost up.
    """
    time_start = time.time()
    all_actions = game.produce_actions(
        state)  # Getting all possible actions given state
    best_score = [np.inf]
    best_score_end = np.inf
    alpha = [-np.inf]
    best_action = None
    best_action_end = None
    max_depth = 2
    flag = False
    lock_m = Lock()
    return_values = [-np.inf for x in range(len(all_actions))]
    while time.time() - time_start < game.max_time:
        thread_list = []
        if len(all_actions) > 0:
            a = all_actions[0]
            thread_list.append(
                Thread(target=max_value,
                       args=(State(second_init_args=(state, a[0], a[1], a[2],
                                                     a[3], a[4])), game, alpha,
                             best_score, 1, max_depth, time_start,
                             state_hash_table, 0, return_values, lock_m)))
            thread_list[0].start()
            thread_list[0].join()

            if return_values[0] < best_score[0]:
                best_score[0] = return_values[0]
                best_action = a
            for i in range(len(all_actions[1:])):
                a = all_actions[i + 1]
                thread_list.append(
                    Thread(target=max_value,
                           args=(State(second_init_args=(state, a[0], a[1],
                                                         a[2], a[3], a[4])),
                                 game, alpha, best_score, 1, max_depth,
                                 time_start, state_hash_table, i + 1,
                                 return_values, lock_m)))

                thread_list[i + 1].start()

            flag_t = True
            flag_time = False
            while flag_t and not flag_time:
                flag_t = False
                for i in range(len(all_actions)):
                    if not thread_list[i].is_alive():
                        lock_m.acquire()
                        if return_values[i] < best_score[0]:
                            best_score[0] = return_values[i]
                            best_action = all_actions[i]
                        lock_m.release()
                    else:
                        flag_t = True
                if time.time() - time_start >= game.max_time:
                    flag_time = True
            for i in range(len(thread_list)):
                thread_list[i].join()

        if not flag_time:
            best_score_end = best_score[0]
            best_action_end = best_action
            flag = True
            print("Depth reached:", max_depth)
        elif flag:
            print("Depth reached:", max_depth - 1)
        else:
            print("Minimum depth not reached")
        max_depth += 1  # Iteratively increasing depth
    return best_action_end, best_score_end

Esempio n. 8

0

Mostra file

def max_value(state, game, alpha, beta, depth, max_depth, time_start,
              state_hash_table, id_m, v, lock_p):
    lock_2.acquire()
    state_hash = state.get_hash()
    hash_result = state_hash_table.get(state_hash)
    lock_2.release()
    all_actions = None
    if hash_result is not None:
        if hash_result['used'] == 1:
            v[id_m] = 0
            return
        if hash_result.get('all_actions') is not None:
            all_actions = hash_result.get('all_actions')
    if cutoff_test(depth, max_depth, game.max_time,
                   time_start):  # If reached maximum depth or total time
        if hash_result is not None:
            v[id_m] = hash_result[
                "value"]  # If state previously evaluated don't recompute heuristic
            return
        value = state.compute_heuristic(
            game.weights, game.color)  # If state not previously evaluated
        lock_2.acquire()
        add_to_hash(state_hash_table, state_hash, value,
                    all_actions)  # Add state and value to hash table
        lock_2.release()
        v[id_m] = value
        return
    tmp_victory = state.check_victory()
    if tmp_victory == -1 and game.color == "BLACK":  # king captured and black player -> Win
        v[id_m] = -MAX_VAL_HEURISTIC
        return
    elif tmp_victory == -1 and game.color == "WHITE":  # King captured and white player -> Lose
        v[id_m] = MAX_VAL_HEURISTIC
        return
    elif tmp_victory == 1 and game.color == "BLACK":  # King escaped and black player -> Lose
        v[id_m] = MAX_VAL_HEURISTIC
        return
    elif tmp_victory == 1 and game.color == "WHITE":  # King escaped and white player -> Win
        v[id_m] = -MAX_VAL_HEURISTIC
        return

    # Body
    if all_actions is None:
        all_actions = game.produce_actions(state)
        if hash_result is not None:
            add_to_hash(state_hash_table, state_hash, hash_result['value'],
                        all_actions)
    if len(all_actions) == 0:
        return -MAX_VAL_HEURISTIC

    return_values = [np.inf for x in range(len(all_actions))]
    lock_p.acquire()
    best_score = [alpha[0]]
    lock_p.release()
    thread_list = []
    lock_m = Lock()
    a = all_actions[0]
    thread = Thread(target=min_value,
                    args=(State(second_init_args=(state, a[0], a[1], a[2],
                                                  a[3], a[4])), game,
                          best_score, beta, depth + 1, max_depth, time_start,
                          state_hash_table, 0, return_values, lock_m))
    thread.start()
    thread.join()
    if return_values[0] > best_score[0]:
        best_score[0] = return_values[0]

    for i in range(len(all_actions[1:int(len(all_actions) / 2)])):
        a = all_actions[i]
        lock_p.acquire()
        if alpha[0] > best_score[0]:
            best_score[0] = alpha[0]
        lock_p.release()
        thread_list.append(
            Thread(target=min_value,
                   args=(State(second_init_args=(state, a[0], a[1], a[2], a[3],
                                                 a[4])), game, best_score,
                         beta, depth + 1, max_depth, time_start,
                         state_hash_table, i, return_values, lock_m)))

        thread_list[i].start()

    flag_t = True
    while flag_t:
        flag_t = False
        for i in range(len(thread_list)):
            if not thread_list[i].is_alive():
                lock_m.acquire()
                if return_values[i] >= beta[0]:
                    v[id_m] = return_values[i]
                    lock_m.release()
                    return
                best_score[0] = max(best_score[0], return_values[i])
                lock_m.release()
            else:
                flag_t = True
    for i in range(len(thread_list)):
        thread_list[i].join()
    thread_list = []
    for i in range(len(all_actions[int(len(all_actions) / 2) + 1:])):
        a = all_actions[i]
        lock_p.acquire()
        if alpha[0] > best_score[0]:
            best_score[0] = alpha[0]
        lock_p.release()
        thread_list.append(
            Thread(target=min_value,
                   args=(State(second_init_args=(state, a[0], a[1], a[2], a[3],
                                                 a[4])), game, best_score,
                         beta, depth + 1, max_depth, time_start,
                         state_hash_table, i, return_values, lock_m)))

        thread_list[i].start()

    flag_t = True
    while flag_t:
        flag_t = False
        for i in range(len(thread_list)):
            if not thread_list[i].is_alive():
                lock_m.acquire()
                if return_values[i] >= beta[0]:
                    v[id_m] = return_values[i]
                    lock_m.release()
                    return
                best_score[0] = max(best_score[0], return_values[i])
                lock_m.release()
            else:
                flag_t = True
    for i in range(len(thread_list)):
        thread_list[i].join()

    v[id_m] = return_values[-1]
    return

Esempio n. 9

0

Mostra file

def choose_action(state, game, state_hash_table):
    """
    Search for the best action using min max with alpha beta pruning
    iteratively increasing the maximum depth.
    It stops only when available time is almost up.
    """
    time_start = time.time()
    max_depth = 2
    flag = False
    best_score_end = np.inf
    best_action_end = None
    best_action = None
    beta = np.inf
    flag_time = [False]
    best_scores = []
    all_actions = game.produce_actions(
        state)  # Getting all possible actions given state
    if len(all_actions) > 0:
        thread_list = []
        active = []
        action = []
        while time.time() - time_start < game.max_time:
            best_score = [-np.inf]
            for i in range(len(best_scores)):
                best_scores[i] = -np.inf
            for j in range(len(all_actions)):
                a = all_actions[j]
                if j == 0:
                    v = min_value(
                        State(second_init_args=(state, a[0], a[1], a[2], a[3],
                                                a[4])), game, best_score[0],
                        beta, 1, max_depth, time_start, state_hash_table)
                    if v > best_score[0]:
                        best_score[0] = v
                        best_action = a
                else:
                    if len(thread_list) < N_THREAD:
                        active.append(False)
                        action.append(a)
                        best_scores.append(np.inf)
                        thread_list.append(
                            Thread(target=search_thread,
                                   args=(state, action, game, best_score, beta,
                                         1, max_depth, time_start,
                                         state_hash_table, flag_time, active,
                                         len(thread_list), best_scores)))
                        thread_list[len(thread_list) - 1].start()
                    else:
                        flag_assign = True
                        while flag_assign:
                            for i in range(len(thread_list)):
                                lock_bool.acquire()
                                tmp = active[i]
                                lock_bool.release()
                                if not tmp:
                                    lock_value.acquire()
                                    if best_scores[i] > best_score[0]:
                                        best_score[0] = best_scores[i]
                                        best_action = action[i]
                                    lock_value.release()
                                    action[i] = a
                                    lock_bool.acquire()
                                    active[i] = True
                                    lock_bool.release()
                                    flag_assign = False
                                    break

                if time.time() - time_start >= game.max_time:
                    lock_time.acquire()
                    flag_time[0] = True
                    lock_time.release()
                    break
            flag_t = True
            while flag_t and not flag_time[0]:
                flag_t = False
                for i in range(N_THREAD):
                    lock_bool.acquire()
                    tmp = active[i]
                    lock_bool.release()
                    if tmp:
                        flag_t = True
                if time.time() - time_start >= game.max_time:
                    lock_time.acquire()
                    flag_time[0] = True
                    lock_time.release()
            # If search at current maximum depth is finished, update best action
            tmp_time = flag_time[0]
            if not tmp_time:
                for i in range(len(thread_list)):
                    if best_scores[i] > best_score[0]:
                        best_score[0] = best_scores[i]
                        best_action = action[i]
                best_score_end = best_score[0]
                best_action_end = best_action
                flag = True
                print("Depth reached:", max_depth)
            elif flag:
                print("Depth reached:", max_depth - 1)
            else:
                print("Minimum depth not reached")
            max_depth += 1  # Iteratively increasing depth
        for i in range(len(thread_list)):
            thread_list[i].join()
    return best_action_end, best_score_end