Ejemplo n.º 1
0
def get_possible_next_states(node):
    """function that returnes list with all possible next states"""

    # sollte hier ueberflüssig sein
    if isWinner(node.state.board, 'X') or isWinner(node.state.board, 'O'):
        print("Error, you should never get here!")
        return []

    else:
        let = node.player.id

        empty = []
        next_states = []

        for i, x in enumerate(node.state.board[1:]):
            if x == ' ':
                empty.append(i + 1)

        for i in empty:
            board = node.state.board[:]
            makeMove(board, let, i)

            # if the new state is an end-state
            if isWinner(board, let):
                status = False
            else:
                status = True

            next_states.append(State(status, board))

        return next_states
Ejemplo n.º 2
0
def drltc(simulation):
    training_dataset = Dataset()
    dnn = DNN(n_nodes, minibatch=16, learning_rate=1e-6)
    statistics = []
    max_trajectory = []

    for iteration in range(n_iterations):
        print("Begin  WSN topology construction......")
        G = nx.Graph()
        G.add_nodes_from([i for i in range(n_nodes)])
        nx.draw(G,
                pos=simulation.node_positions,
                color_map=color_map,
                with_labels=True)
        plt.savefig('./Topology_visualization/iteration_' +
                    str(iteration + 1) + '_initial.png')
        plt.close()
        G.clear()
        for episode in range(n_episodes):

            root_state = State(np.zeros((n_nodes, n_nodes)))

            for n in range(n_nodes):

                if root_state.is_terminal():
                    reward = simulation.eval(root_state.adjacency)
                    #print('reward', reward)
                    for dataset in training_dataset.data[-n:]:
                        dataset[-1] = np.array(
                            reward
                        )  #update value in all datasets produced in this iteration
                else:

                    mcts = MCTS(root_state.shape, dnn, simulation,
                                exploration_level)
                    #TODO keep subtrees?

                    for search in range(n_searches):
                        print(
                            f'\riteration {iteration:02}, episode {episode:02}, level {n:02}, search {search:02}',
                            end='')
                        mcts.search(root_state)

                    # update
                    print(
                        '\nexploration:',
                        np.linalg.norm(
                            mcts.action_visits[root_state].flatten(), 0))
                    if mcts.action_visits[root_state].sum() != 0:
                        normalized_visits = mcts.action_visits[
                            root_state] / mcts.action_visits[root_state].sum()
                    else:
                        normalized_visits = mcts.action_visits[root_state]

                    training_dataset.add([
                        root_state.adjacency,
                        normalized_visits.flatten(), None
                    ])
                    #print(normalized_visits)
                    next_action = np.unravel_index(
                        np.random.choice(n_nodes**2,
                                         p=normalized_visits.flatten()),
                        shape=normalized_visits.shape)
                    root_state = root_state.transition(next_action)

            #for s in max_trajectory:
            #print(s.adjacency)
            #print(mcts.action_visits[s])
        print('\n')
        for i in range(n_trainings):
            dnn.train(training_dataset, epoch=i)

        lifetimes = []
        max_value = 0
        for i in range(n_simulations):
            state = State(np.zeros((n_nodes, n_nodes)))
            trajectory = [state]
            while not state.is_terminal():
                state_policy, _ = dnn.eval(state.adjacency)
                #print(state_policy)
                state_policy[~state.get_valid_actions(
                )] = 0  # set invalid actions to 0
                state_policy /= state_policy.sum(
                )  # re-normalize over valid actions
                next_action = np.unravel_index(np.random.choice(
                    n_nodes**2, p=state_policy.flatten()),
                                               shape=state_policy.shape)
                state = state.transition(next_action)
                trajectory.append(state)
            final_topology = state.adjacency
            value = simulation.eval(final_topology)
            lifetimes.append(value)
            if (value > max_value):
                max_value = value
                max_trajectory = trajectory
                max_topology = final_topology
        statistics.append([
            sum(lifetimes) / n_simulations,
            max(lifetimes),
            min(lifetimes),
            max(lifetimes) - min(lifetimes)
        ])
        print(f'statistics: {statistics[-1]}')

        edge_x, edge_y = np.where(final_topology)
        G = nx.DiGraph()
        edge_list = []
        for x, y in zip(edge_x, edge_y):
            edge_list.append((y, x))
        G.add_edges_from(edge_list)
        nx.draw(G,
                pos=simulation.node_positions,
                node_color=color_map,
                with_labels=True)
        plt.savefig('./Topology_visualization/iteration_' +
                    str(iteration + 1) + '_completed.png')
        plt.close()

        movements = np.array(simulation.node_positions)
        movements[0] = movements[0] * 1.78 - 45
        label = movements == simulation.node_positions
        for idx, coor in label:
            if not coor:
                color_map[idx] = 'red'

        nx.draw(G, pos=movements, node_color=color_map, with_labels=True)
        plt.savefig('./Topology_visualization/iteration_' +
                    str(iteration + 1) + '_movements.png')
        plt.close()

        if iteration % 10 == 0 and iteration != 0:
            # print(star_baseline(simulation))
            # print(mst_baseline(simulation))
            # print(random_baseline(simulation, n_simulations))
            statistics_np = np.array(statistics)
            plt.plot(statistics_np[:, 0])
            plt.plot(statistics_np[:, 1])
            plt.plot(statistics_np[:, 2])
            plt.ylabel('lifetime')
            plt.xlabel('iteration')
            plt.savefig(
                f'{experiment}/ckp_{experiment}_n{n_nodes}_e{n_episodes}_s{n_searches}_sim{n_simulations}_t{n_trainings}_i{iteration}.png'
            )
            torch.save(
                dnn.model.state_dict,
                f'{experiment}/ckp_{experiment}_n{n_nodes}_e{n_episodes}_s{n_searches}_sim{n_simulations}_t{n_trainings}_i{iteration}.pt'
            )
Ejemplo n.º 3
0
def AI_vs_AI():
    flag = True
    while flag:
        playerLetter = random.choice(('X', 'O'))
        computerLetter = 'O' if playerLetter == 'X' else 'X'
        turn = whoGoesFirst()
        theBoard = [' '] * 10

        mcts = MCTS(2, playrandom, get_possible_next_states)

        first_letter = playerLetter if turn == 'player' else computerLetter

        for player in mcts.player_list:
            if player.nr == 0:
                player.id = first_letter
            else:
                player.id = playerLetter if first_letter == computerLetter else computerLetter

        mcts.root = Node(State(True, theBoard), mcts.player_list[0])

        gameIsPlaying = True

        while gameIsPlaying:
            if turn == 'player':
                print('\n')

                drawBoard(theBoard)
                print('\n')
                mcts.root = mcts.find_next_move()
                # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0])

                # make the move that was choosen by the mcts-algorithm
                for i, entry in enumerate(theBoard):
                    if entry != mcts.root.state.board[i]:
                        makeMove(theBoard, playerLetter, i)
                        break

                if isWinner(theBoard, playerLetter):
                    drawBoard(theBoard)
                    print(playerLetter, ' won the game!')
                    gameIsPlaying = False
                else:
                    if isBoardFull(theBoard):
                        drawBoard(theBoard)
                        print('The game is a tie!')
                        break
                    else:
                        turn = 'computer'
                        input()

            else:
                print('\n')
                #print('\n')
                drawBoard(theBoard)
                print('\n')

                mcts.root = mcts.find_next_move()
                # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0])

                # make the move that was choosen by the mcts-algorithm
                for i, entry in enumerate(theBoard):
                    if entry != mcts.root.state.board[i]:
                        makeMove(theBoard, computerLetter, i)
                        break

                if isWinner(theBoard, computerLetter):
                    drawBoard(theBoard)
                    print(computerLetter, ' won the game!')
                    gameIsPlaying = False
                else:
                    if isBoardFull(theBoard):
                        drawBoard(theBoard)
                        print('The game is a tie!')
                        break
                    else:
                        turn = 'player'
                        input()

        cont = input('another game?\n')
        if cont not in ['y', 'yes', 'ye']:
            flag = False
Ejemplo n.º 4
0
def normal_game():
    print("\nWelcome to MonteCarlo-TicTacToe")
    playerLetter, computerLetter = inputPlayerLetter()
    turn = whoGoesFirst()
    theBoard = [' '] * 10

    mcts = MCTS(2, playrandom, get_possible_next_states)

    first_letter = playerLetter if turn == 'player' else computerLetter

    for player in mcts.player_list:
        if player.nr == 0:
            player.id = first_letter
        else:
            player.id = playerLetter if first_letter == computerLetter else computerLetter

    mcts.root = Node(State(True, theBoard), mcts.player_list[0])

    gameIsPlaying = True

    while gameIsPlaying:
        if turn == 'player':
            print('\n')
            drawBoard(theBoard)
            move = getPlayerMove(theBoard)
            makeMove(theBoard, playerLetter, move)

            status = True  # da falls das nicht der fall ist in der folgenden Auswertung sowieso das Spiel endet

            next_state = State(status, theBoard)
            mcts.update_root(next_state)

            if isWinner(theBoard, playerLetter):
                drawBoard(theBoard)
                print('You have won the game!')
                gameIsPlaying = False
            else:
                if isBoardFull(theBoard):
                    drawBoard(theBoard)
                    print('The game is a tie!')
                    break
                else:
                    turn = 'computer'

        else:
            print('\n')
            print('\n')
            drawBoard(theBoard)

            mcts.root = mcts.find_next_move()
            #choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0])

            # make the move that was choosen by the mcts-algorithm
            for i, entry in enumerate(theBoard):
                if entry != mcts.root.state.board[i]:
                    makeMove(theBoard, computerLetter, i)
                    break

            if isWinner(theBoard, computerLetter):
                drawBoard(theBoard)
                print('The computer has beaten you!')
                gameIsPlaying = False
            else:
                if isBoardFull(theBoard):
                    drawBoard(theBoard)
                    print('The game is a tie!')
                    break
                else:
                    turn = 'player'