def get_possible_next_states(node): """function that returnes list with all possible next states""" # sollte hier ueberflüssig sein if isWinner(node.state.board, 'X') or isWinner(node.state.board, 'O'): print("Error, you should never get here!") return [] else: let = node.player.id empty = [] next_states = [] for i, x in enumerate(node.state.board[1:]): if x == ' ': empty.append(i + 1) for i in empty: board = node.state.board[:] makeMove(board, let, i) # if the new state is an end-state if isWinner(board, let): status = False else: status = True next_states.append(State(status, board)) return next_states
def drltc(simulation): training_dataset = Dataset() dnn = DNN(n_nodes, minibatch=16, learning_rate=1e-6) statistics = [] max_trajectory = [] for iteration in range(n_iterations): print("Begin WSN topology construction......") G = nx.Graph() G.add_nodes_from([i for i in range(n_nodes)]) nx.draw(G, pos=simulation.node_positions, color_map=color_map, with_labels=True) plt.savefig('./Topology_visualization/iteration_' + str(iteration + 1) + '_initial.png') plt.close() G.clear() for episode in range(n_episodes): root_state = State(np.zeros((n_nodes, n_nodes))) for n in range(n_nodes): if root_state.is_terminal(): reward = simulation.eval(root_state.adjacency) #print('reward', reward) for dataset in training_dataset.data[-n:]: dataset[-1] = np.array( reward ) #update value in all datasets produced in this iteration else: mcts = MCTS(root_state.shape, dnn, simulation, exploration_level) #TODO keep subtrees? for search in range(n_searches): print( f'\riteration {iteration:02}, episode {episode:02}, level {n:02}, search {search:02}', end='') mcts.search(root_state) # update print( '\nexploration:', np.linalg.norm( mcts.action_visits[root_state].flatten(), 0)) if mcts.action_visits[root_state].sum() != 0: normalized_visits = mcts.action_visits[ root_state] / mcts.action_visits[root_state].sum() else: normalized_visits = mcts.action_visits[root_state] training_dataset.add([ root_state.adjacency, normalized_visits.flatten(), None ]) #print(normalized_visits) next_action = np.unravel_index( np.random.choice(n_nodes**2, p=normalized_visits.flatten()), shape=normalized_visits.shape) root_state = root_state.transition(next_action) #for s in max_trajectory: #print(s.adjacency) #print(mcts.action_visits[s]) print('\n') for i in range(n_trainings): dnn.train(training_dataset, epoch=i) lifetimes = [] max_value = 0 for i in range(n_simulations): state = State(np.zeros((n_nodes, n_nodes))) trajectory = [state] while not state.is_terminal(): state_policy, _ = dnn.eval(state.adjacency) #print(state_policy) state_policy[~state.get_valid_actions( )] = 0 # set invalid actions to 0 state_policy /= state_policy.sum( ) # re-normalize over valid actions next_action = np.unravel_index(np.random.choice( n_nodes**2, p=state_policy.flatten()), shape=state_policy.shape) state = state.transition(next_action) trajectory.append(state) final_topology = state.adjacency value = simulation.eval(final_topology) lifetimes.append(value) if (value > max_value): max_value = value max_trajectory = trajectory max_topology = final_topology statistics.append([ sum(lifetimes) / n_simulations, max(lifetimes), min(lifetimes), max(lifetimes) - min(lifetimes) ]) print(f'statistics: {statistics[-1]}') edge_x, edge_y = np.where(final_topology) G = nx.DiGraph() edge_list = [] for x, y in zip(edge_x, edge_y): edge_list.append((y, x)) G.add_edges_from(edge_list) nx.draw(G, pos=simulation.node_positions, node_color=color_map, with_labels=True) plt.savefig('./Topology_visualization/iteration_' + str(iteration + 1) + '_completed.png') plt.close() movements = np.array(simulation.node_positions) movements[0] = movements[0] * 1.78 - 45 label = movements == simulation.node_positions for idx, coor in label: if not coor: color_map[idx] = 'red' nx.draw(G, pos=movements, node_color=color_map, with_labels=True) plt.savefig('./Topology_visualization/iteration_' + str(iteration + 1) + '_movements.png') plt.close() if iteration % 10 == 0 and iteration != 0: # print(star_baseline(simulation)) # print(mst_baseline(simulation)) # print(random_baseline(simulation, n_simulations)) statistics_np = np.array(statistics) plt.plot(statistics_np[:, 0]) plt.plot(statistics_np[:, 1]) plt.plot(statistics_np[:, 2]) plt.ylabel('lifetime') plt.xlabel('iteration') plt.savefig( f'{experiment}/ckp_{experiment}_n{n_nodes}_e{n_episodes}_s{n_searches}_sim{n_simulations}_t{n_trainings}_i{iteration}.png' ) torch.save( dnn.model.state_dict, f'{experiment}/ckp_{experiment}_n{n_nodes}_e{n_episodes}_s{n_searches}_sim{n_simulations}_t{n_trainings}_i{iteration}.pt' )
def AI_vs_AI(): flag = True while flag: playerLetter = random.choice(('X', 'O')) computerLetter = 'O' if playerLetter == 'X' else 'X' turn = whoGoesFirst() theBoard = [' '] * 10 mcts = MCTS(2, playrandom, get_possible_next_states) first_letter = playerLetter if turn == 'player' else computerLetter for player in mcts.player_list: if player.nr == 0: player.id = first_letter else: player.id = playerLetter if first_letter == computerLetter else computerLetter mcts.root = Node(State(True, theBoard), mcts.player_list[0]) gameIsPlaying = True while gameIsPlaying: if turn == 'player': print('\n') drawBoard(theBoard) print('\n') mcts.root = mcts.find_next_move() # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0]) # make the move that was choosen by the mcts-algorithm for i, entry in enumerate(theBoard): if entry != mcts.root.state.board[i]: makeMove(theBoard, playerLetter, i) break if isWinner(theBoard, playerLetter): drawBoard(theBoard) print(playerLetter, ' won the game!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'computer' input() else: print('\n') #print('\n') drawBoard(theBoard) print('\n') mcts.root = mcts.find_next_move() # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0]) # make the move that was choosen by the mcts-algorithm for i, entry in enumerate(theBoard): if entry != mcts.root.state.board[i]: makeMove(theBoard, computerLetter, i) break if isWinner(theBoard, computerLetter): drawBoard(theBoard) print(computerLetter, ' won the game!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'player' input() cont = input('another game?\n') if cont not in ['y', 'yes', 'ye']: flag = False
def normal_game(): print("\nWelcome to MonteCarlo-TicTacToe") playerLetter, computerLetter = inputPlayerLetter() turn = whoGoesFirst() theBoard = [' '] * 10 mcts = MCTS(2, playrandom, get_possible_next_states) first_letter = playerLetter if turn == 'player' else computerLetter for player in mcts.player_list: if player.nr == 0: player.id = first_letter else: player.id = playerLetter if first_letter == computerLetter else computerLetter mcts.root = Node(State(True, theBoard), mcts.player_list[0]) gameIsPlaying = True while gameIsPlaying: if turn == 'player': print('\n') drawBoard(theBoard) move = getPlayerMove(theBoard) makeMove(theBoard, playerLetter, move) status = True # da falls das nicht der fall ist in der folgenden Auswertung sowieso das Spiel endet next_state = State(status, theBoard) mcts.update_root(next_state) if isWinner(theBoard, playerLetter): drawBoard(theBoard) print('You have won the game!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'computer' else: print('\n') print('\n') drawBoard(theBoard) mcts.root = mcts.find_next_move() #choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0]) # make the move that was choosen by the mcts-algorithm for i, entry in enumerate(theBoard): if entry != mcts.root.state.board[i]: makeMove(theBoard, computerLetter, i) break if isWinner(theBoard, computerLetter): drawBoard(theBoard) print('The computer has beaten you!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'player'