Esempio n. 1
0
def pazaak():
    b = PazaakBoard()
    player_side_deck = [
        Card(x) for x in [choice(range(1, 7)) for _ in range(4)]
    ]
    opponent_side_deck = [
        Card(x) for x in [choice(range(1, 7)) for _ in range(4)]
    ]

    players = [
        PazaakPlayer(player=1, side_deck=player_side_deck),
        PazaakPlayer(player=2, side_deck=opponent_side_deck)
    ]

    state = PazaakState(board=b, players=players, player=players[0])

    while b.status(players=state.players) == -1:
        if not state.player.stand:
            state = state.random_card()

        node = Node(state=state)
        tree = Mcts(root=node)

        print(">>>>> CURR PLAYER: <<<<<<<", state.player.player)
        b = tree.find_next_move(100)
        state = PazaakState(board=b,
                            player=state.player,
                            players=state.players,
                            player_index=state.player_index)
        print("TURN\n")
        b.print()
Esempio n. 2
0
 def simulate(self, game: Game, state: State):
     """
     Used to run all the tree calls in one step
     :param game: Game
     :param state: State
     """
     root_node = Node(None, state, game.current_player)
     for i in range(self.m):
         child = self.tree_search(game, root_node)
         self.expand(game, child.state)
         fin = self.do_random_walk(game, child)
         self.backprop(child, fin.player, 1)
Esempio n. 3
0
    def simulate(self, node: Node):
        curr = node

        board_status = curr.state.board.status(players=self.root.state.players)

        while board_status == constants.IN_PROGRESS:
            curr.state.player = curr.state.toggle_player_new()
            new_state = curr.state.random_play()
            curr = Node(state=new_state)
            board_status = curr.state.board.status(players=curr.state.players)

        return board_status
Esempio n. 4
0
def get_next_board():
    global app_state
    if app_state.board.status(players=app_state.players) == -1:
        if not app_state.player.stand:
            app_state = app_state.random_card()

        node = Node(state=app_state)
        tree = Mcts(root=node)

        app_state.board = tree.find_next_move(100)
        app_state = PazaakState(board=app_state.board,
                                player=app_state.player,
                                players=app_state.players,
                                player_index=app_state.player_index)

        return app_state.board

    return app_state.board
Esempio n. 5
0
def tic_tac_toe():
    b = TicTacBoard()

    players = [TicTacPlayer(1), TicTacPlayer(2)]
    state = TicTacState(board=b, player=players[0], players=players)

    while b.status() == -1:
        node = Node(state=state)
        tree = Mcts(root=node)

        print(">>>>> CURR PLAYER: <<<<<<<", state.player.player)
        b = tree.find_next_move(100)
        state = TicTacState(board=b,
                            player=state.player,
                            players=state.players,
                            player_index=state.player_index)
        print("TURN\n")
        b.print()
Esempio n. 6
0
    def expand(self, node):
        not_visited_actions = deepcopy(node.allowed_actions)
        for child in node.children:
            not_visited_actions.remove(child.previous_action)

        #TODO: check if this should be random or chosen by player policy
        chosen_action = random.choice(tuple(not_visited_actions))

        schafkopf_env = gym.make("Schafkopf-v1")
        schafkopf_env.setGameState(deepcopy(node.game_state),
                                   deepcopy(node.player_hands))
        schafkopf_env.stepTest(chosen_action)  # state, rewards, terminal, info

        new_node = Node(
            parent=node,
            game_state=deepcopy(schafkopf_env.getGameState()),
            previous_action=chosen_action,
            player_hands=deepcopy(schafkopf_env.getCards()),
            allowed_actions=schafkopf_env.test_game.getOptionsList())
        node.add_child(child_node=new_node)
        return new_node
Esempio n. 7
0
 def gen_child_nodes(self, node: Node):
     p = self.get_next_player(node.player)
     nodes = [Node(node, s, p) for s in self.gen_child_states(node.state)]
     return nodes
Esempio n. 8
0
class MonteCarloTree:
    '''
  Inspired by https://github.com/Taschee/schafkopf/blob/master/schafkopf/players/uct_player.py
  '''
    def __init__(self, game_state, player_hands, allowed_actions, ucb_const=1):
        self.root = Node(None, None, game_state, player_hands, allowed_actions)
        self.ucb_const = ucb_const

    def uct_search(self, num_playouts):
        for _ in range(num_playouts):
            selected_node = self.selection()
            rewards = self.simulation(selected_node)
            self.backup_rewards(leaf_node=selected_node, rewards=rewards)

        results = []
        for child in self.root.children:
            results.append(
                (child.previous_action, child.visits,
                 child.get_average_reward(self.root.game_state["cp"])))

        return results

    def selection(self):
        current_node = self.root
        while not current_node.is_terminal():
            if not current_node.fully_expanded():
                return self.expand(current_node)
            else:
                current_node = current_node.best_child(
                    ucb_const=self.ucb_const)
        return current_node

    def expand(self, node):
        not_visited_actions = deepcopy(node.allowed_actions)
        for child in node.children:
            not_visited_actions.remove(child.previous_action)

        #TODO: check if this should be random or chosen by player policy
        chosen_action = random.choice(tuple(not_visited_actions))

        schafkopf_env = gym.make("Schafkopf-v1")
        schafkopf_env.setGameState(deepcopy(node.game_state),
                                   deepcopy(node.player_hands))
        schafkopf_env.stepTest(chosen_action)  # state, rewards, terminal, info

        new_node = Node(
            parent=node,
            game_state=deepcopy(schafkopf_env.getGameState()),
            previous_action=chosen_action,
            player_hands=deepcopy(schafkopf_env.getCards()),
            allowed_actions=schafkopf_env.test_game.getOptionsList())
        node.add_child(child_node=new_node)
        return new_node

    def simulation(self, selected_node):
        schafkopf_env = gym.make("Schafkopf-v1")
        gameOver = deepcopy(selected_node.game_state)["gameOver"]
        schafkopf_env.setGameState(deepcopy(selected_node.game_state),
                                   deepcopy(selected_node.player_hands))

        while not gameOver:
            rewards, round_finished, gameOver = schafkopf_env.test_game.playUntilEnd(
            )
        return rewards["final_rewards"]

    def backup_rewards(self, leaf_node, rewards):
        current_node = leaf_node
        while current_node != self.root:
            current_node.update_rewards(rewards)
            current_node.update_visits()
            current_node = current_node.parent
        self.root.update_visits()

    def get_action_count_rewards(self):
        result = {}
        for child in self.root.children:
            if isinstance(child.previous_action, list):
                result[tuple(
                    child.previous_action)] = (child.visits,
                                               child.cumulative_rewards)
            else:
                result[child.previous_action] = (child.visits,
                                                 child.cumulative_rewards)
        return result
Esempio n. 9
0
 def __init__(self, game_state, player_hands, allowed_actions, ucb_const=1):
     self.root = Node(None, None, game_state, player_hands, allowed_actions)
     self.ucb_const = ucb_const
Esempio n. 10
0
def simulate(sureWinReg, sureWinPolicy, targetSet, initState, traj, inference):

    max_time_steps = ARG.levels
    simulation_times = ARG.simulation_times

    levels = ARG.levels
    num_sims = ARG.num_sims

    sure_winning_regions = load_sure_winning_regions(sureWinReg)
    sure_winning_policy = load_pre_computed_policy(sureWinPolicy)

    ctrl_env = ControllableEnvironment()
    ad_env = AdversarialEnvironment()

    eva = Evaluation(ctrl_env, ad_env)
    root_state = initState

    "create a tree"
    tree = MonteCarloTreeSearch(sure_winning_regions, ctrl_env, ad_env)
    inference_online = inference

    s = root_state
    print("| The initial state is: ", s)
    contr_s = s[0]
    ad_s = s[1]
    joint_traj = traj
    actions = []
    touch = False
    if np.linalg.norm(np.array(contr_s) - np.array(ad_s), ord=1) <= 1:
        print(' ')
        print('| Sorry! The Blue agent is caught!')
        # print("redo the simulate")

    elif contr_s in targetSet:
        print(' ')
        print('| Congrats!!! The blue agent reaches the true goal!!!')
        print('------------------------------------')

    elif contr_s + ad_s in sure_winning_regions:
        current_joint_state = joint_traj[-1]
        if len(current_joint_state) > 1:
            inference_online.update_traj(current_joint_state, False, disp_flag=False)
        # inference_online.update_traj(joint_traj, False, disp_flag=False)
        if touch is False:
            print(' ')
            print('| Congrats!!! The blue agent reaches the sure winning regions!!!')
            print('------------------------------------')
        # contr_a = inference_online.inference_learning.A[sure_winning_policy[(contr_s, ad_s)]]
        contr_a = ctrl_env.A_full[sure_winning_policy[(contr_s, ad_s)]]
    else:
        # inference_online.update_traj(joint_traj, False, disp_flag=False)
        current_joint_state = joint_traj[-1]
        if len(current_joint_state) > 1:
            inference_online.update_traj(current_joint_state, False, disp_flag=False)
        current_node = Node(History([joint_traj], levels, sure_winning_regions, ctrl_env, ad_env))
        best_child = tree.uct_search(num_sims / float(levels), current_node, targetSet)  # create the current root
        contr_a = best_child.history.value[-1]
    contr_n_s = ctrl_env.sto_trans(contr_s, tuple(contr_a))

    ad_a = inference_online.get_ad_action(s)
    ad_n_s = ad_env.sto_trans(ad_s, ad_a)

    contr_s = contr_n_s
    ad_s = ad_n_s
    s = (contr_n_s, ad_n_s)

    print(" ")
    print("| The current state is :", s)
    print("before append:", joint_traj)
    joint_traj.append(s)
    print("after append:", joint_traj)
    actions.append(contr_a)
    print(inference_online.inference_learning.P_h_g)
    return s, joint_traj, inference_online
Esempio n. 11
0
def load_parameters_mcts(config):
    """
    Load previous tree, data and info if the already exist
    otherwise create new ones
    Initialise Scorer and locks for multithreading

    :return: None
    """
    with open('mcts/configurations/' + config + ".json") as c:
        config = json.load(c)
    p.config = config
    p.directory = 'data_out/' + config['configuration_name'] + '/'
    p.f_tree = p.directory + "tree.pckl"
    p.f_info = p.directory + "info.json"
    p.f_data = p.directory + "data.json"
    p.f_stat = p.directory + "stat.csv"
    p.r_dft = p.directory + "dft/"
    p.f_stop = p.directory + "stop.txt"
    p.lock_update_data = threading.Lock()
    p.lock_update_node = threading.Lock()
    p.lock_sa_score = threading.Lock()
    if not os.path.isdir(p.directory) or not os.path.isfile(p.f_tree) or not os.path.isfile(p.f_info)\
            or not os.path.isfile(p.f_stat) or not os.path.isfile(p.f_data):
        if not os.path.isdir(p.directory):
            os.mkdir(p.directory)
        if not os.path.isdir(p.r_dft):
            os.mkdir(p.r_dft)
        with open(p.f_stop, 'w') as stop:
            stop.write("")
        with open(p.f_stat, 'w') as stat:
            stat.write("")
        print("New tree")
        p.tree_info = dict()
        p.tree_info[p.info_created] = 0
        p.tree_info[p.info_good] = 0
        p.tree_info[p.info_bad] = 0
        p.tree_info[p.info_alrd_tested] = 0
        p.data = dict()
        if config['from'] == 'root':
            p.tree = Node(SMILES(config['prefix']))
        else:
            p.tree = Node()
        p.turn = 0

    else:
        print("Loading previous tree, data and info")
        with open(p.f_tree, 'rb') as f:
            pickler = pickle.Unpickler(f)
            p.tree = pickler.load()
            while p.tree.parent:
                p.tree = p.tree.parent
        with open(p.f_data, 'r') as f:
            p.data = json.load(f)
        with open(p.f_info, 'r') as f:
            p.tree_info = json.load(f)
        p.turn = sum(1 for line in open(p.f_stat))

        if config['from'] == 'root':
            if p.tree.smiles.element != config['prefix']:
                raise Exception("Root node different from previous execution")

    with open('rnn_models/' + p.config['rnn_repertory'] +
              "/config.json") as info_rnn:
        p.tokens = json.load(info_rnn)['tokens']

    data_base.load_data_base()

    reset_score_visit(p.tree)
    p.scorer = getattr(
        __import__(p.config['scorer'][0], fromlist=[p.config['scorer'][1]]),
        p.config['scorer'][1])(p.config['alpha_scorer'])

    load_scores()

    p.models = load_model(p.config['rnn_repertory'])
Esempio n. 12
0
    def expand_node(self, node: Node, player: Player):
        player = node.state.toggle_player_new()
        possible_states = node.state.get_all_states(player)

        for state in possible_states:
            node.children.append(Node(state=state, parent=node))
Esempio n. 13
0
def simulate(sureWinReg, sureWinPolicy, targetSet, initState, traj, inference):

    max_time_steps = ARG.levels
    simulation_times = ARG.simulation_times

    levels = ARG.levels  # this needs to be smaller than the number defined in the state
    num_sims = ARG.num_sims

    'Global Variables'
    sure_winning_regions = load_sure_winning_regions(sureWinReg)
    sure_winning_policy = load_pre_computed_policy(sureWinPolicy)

    "Create two environments"
    ctrl_env = ControllableEnvironment()
    ad_env = AdversarialEnvironment()

    root_state = initState
    s = root_state
    "create a tree"
    tree = MonteCarloTreeSearch(sure_winning_regions, ctrl_env, ad_env)
    inference_online =inference

    print(" ")
    print("| This state is: ", s)
    contr_s = s[0]
    ad_s = s[1]
    joint_traj = traj
    actions = []
    inference_online.reset_inference(joint_traj)
    print(' ')
    print("belief:", inference_online.inference_learning.P_h_g)

    touch = False
    if np.linalg.norm(np.array(contr_s) - np.array(ad_s), ord=1) <= 1:
        print(' ')
        print('| Sorry! The Blue agent is caught!')

    elif contr_s in targetSet:
        print(' ')
        print('| Congrats!!! The blue agent reaches the temp goal!!!')
        print('------------------------------------')

    elif contr_s + ad_s in sure_winning_regions:
        if touch is False:
            print(' ')
            print('| Congrats!!! The blue agent reaches the sure winning regions!!!')
            print('------------------------------------')
            touch = True

        contr_a = ctrl_env.A_full[sure_winning_policy[(contr_s, ad_s)]]

    else:
        current_node = Node(History([joint_traj], levels, sure_winning_regions, ctrl_env, ad_env))
        tree.reset_tree()
        best_child = tree.uct_search(num_sims / float(levels), current_node)  # create the current root

        contr_a = best_child.history.value[-1]

    contr_n_s = ctrl_env.sto_trans(contr_s, tuple(contr_a))

    ad_a = inference_online.get_ad_action(s)
    # ad_a = inference_online.get_ad_max_action(s)
    # ad_a = inference_online.get_ad_pursuit_action(s)

    ad_n_s = ad_env.sto_trans(ad_s, ad_a)
    contr_s = contr_n_s
    ad_s = ad_n_s
    s = (contr_n_s, ad_n_s)

    print(" ")
    print("| The current state is :", s)
    joint_traj.append(s)
    inference_online.update_traj(joint_traj, disp_flag=False)
    print(' ')
    print(inference_online.inference_learning.P_h_g)
    # print(inference_online.inference_learning.total_KL)
    actions.append(contr_a)
    return s, joint_traj, inference_online