コード例 #1
0
ファイル: run.py プロジェクト: Pontius1007/IT3105
    def find_move(self, node, simulations, batch_player):
        # Node is the root node
        move_node = node
        # print(move_node.state.get_player())

        for i in range(0, simulations):

            # move_node.state.player = 3 - move_node.state.player

            # this searches through tree based on UCT value
            best_node = mcts.MCTS().search(move_node, batch_player)

            # expands the node with children if there are possible states
            mcts.MCTS().expand(best_node)

            # if node was expanded, choose a random child to evaluate
            if len(best_node.get_child_nodes()) > 0:
                best_node = random.choice(best_node.get_child_nodes())

            # simulates winner. Rollout
            winner = mcts.MCTS().ANET_evaluate(ANET=self.ANET, node=best_node)

            # traverses up tree with winner
            mcts.MCTS().backpropogate(best_node, winner, batch_player)

        return move_node
コード例 #2
0
ファイル: player.py プロジェクト: ishivvers/w.a.a.i.t.
 def __init__(self,
              player_id,
              game_id,
              server_address=('localhost', 4242),
              time_allowed=5,
              n_threads=3,
              tree_file='trees/tree_9h.p'):
     """
     Args:
         time_allowed (int): time allowed for thinking before a response is required
         n_threads (int): number of worker MCTS threads to spin up
     """
     self.event = multiprocessing.Event()
     self.event.clear()
     self.time_allowed = time_allowed
     self.n_threads = n_threads
     self.tree_keeper = mcts.MCTS(checkers.Board())  # holds the merged tree
     if tree_file is not None:
         self.tree_keeper.load_tree(tree_file)
     self.mcts_threads = [
         None
     ] * self.n_threads  # holds the pointers to worker threads
     self.thread_pipes = [None for _ in range(self.n_threads)
                          ]  # holds the pipes to workers
     self._start_threads()
     super().__init__(player_id, game_id, server_address=server_address)
コード例 #3
0
def generate_small_maze25(num_games=25, num_rollout=50):

    # board_state_node = mcts.pacmanNode(game_board, 0)
    data = []

    for game in range(num_games):
        # initialization for the next round of AI search
        L, ghosts = pp.smallMaze(2, 5)
        pos_i, pos_j = 5, 10
        init_board = pp.MazeGameBoard(L, ghosts, pos_i, pos_j, 0)
        tree = mcts.MCTS()
        board_state_node = mcts.pacmanNode(init_board, 0)

        while True:
            L0, pos_i0, pos_j0, score0 = pp.retriveInfoFromGameBoard(
                board_state_node.board)

            for num in range(num_rollout):
                tree.do_rollout(board_state_node)
            board_state_node.board.one_step_more()

            # print(board_state_node.board.current_steps)
            board_state_node, max_score = tree.choose(board_state_node)

            if board_state_node.is_terminal():
                data = get_data(tree, data)
                break

            maze, pos_i, pos_j, score = pp.retriveInfoFromGameBoard(
                board_state_node.board)

            if maze[pos_i][pos_j] != 3:
                maze[pos_i][pos_j] = " "

            if board_state_node.is_terminal():
                data = get_data(tree, data)
                break

            ghosts = board_state_node.board.ghosts
            for ghost in ghosts:
                if (ghosts.index(ghost) % 3 == 0):
                    bestAction = pp.eclideanGhostAction(
                        maze, ghost, pos_i, pos_j)
                    ghost.move(bestAction, maze)
                elif (ghosts.index(ghost) % 3 == 1):
                    bestAction = pp.manhanttanGhostAction(
                        maze, ghost, pos_i, pos_j)
                    ghost.move(bestAction, maze)
                elif (ghosts.index(ghost) % 3 == 2):
                    bestAction = pp.randomGhostAction(maze, ghost)
                    ghost.move(bestAction, maze)

            if board_state_node.is_terminal():
                data = get_data(tree, data)
                break

        # for child in max_node.children():
        #     data.append((child, mcts.get_score_estimates(child)))

    return data
コード例 #4
0
    def test_mcts(self):
        storage_threshold = 12
        index1 = iaw.IndexItem('public.a', 'col1', index_type='global')
        index2 = iaw.IndexItem('public.b', 'col1', index_type='global')
        index3 = iaw.IndexItem('public.c', 'col1', index_type='global')
        index4 = iaw.IndexItem('public.d', 'col1', index_type='global')

        atomic_index1 = iaw.IndexItem('public.a', 'col1', index_type='global')
        atomic_index2 = iaw.IndexItem('public.b', 'col1', index_type='global')
        atomic_index3 = iaw.IndexItem('public.c', 'col1', index_type='global')
        atomic_index4 = iaw.IndexItem('public.d', 'col1', index_type='global')

        atomic_index1.storage = 10
        atomic_index2.storage = 4
        atomic_index3.storage = 7
        available_choices = [index1, index2, index3, index4]
        atomic_choices = [[], [atomic_index2], [atomic_index1],
                          [atomic_index3], [atomic_index2, atomic_index3],
                          [atomic_index4]]
        query = iaw.QueryItem('select * from gia_01', 1)
        query.cost_list = [10, 7, 5, 9, 4, 11]
        workload_info = [query]

        results = mcts.MCTS(workload_info, atomic_choices, available_choices,
                            storage_threshold, 2)
        self.assertLessEqual(
            [index1.atomic_pos, index2.atomic_pos, index3.atomic_pos],
            [2, 1, 3])
        self.assertSetEqual({results[0].table, results[1].table},
                            {'public.b', 'public.c'})
コード例 #5
0
def initializeVersusAIVariables():
	global playerOne
	global playerTwo
	global current
	global nextMove
	global board
	global agent
	P1 = 0
	P2 = 1
	# multiprocess computaion
	print "board", agent	
	board = mancala_board.Board()
	playerOne = Human(P1)
	# ai Player

	print agent, 'sdddddddddddddddddddddddddddd'
	if agent == "Minimax":
		playerTwo = minimax.AI(P2, 8)
	elif agent == "Genesis":
		playerTwo = genesis.genesis(P2, 3)
	elif agent == "MonteCarlo":
		playerTwo = mcts.MCTS(P2, mancala_board.Board())
	
	print "Versus AI Agent:", agent
	# starting player is random
	# current = random.randint(0,1)  # todo this line is a f*****g problem......
	current = None
	nextMove = random.randint(0,1)
	return jsonify({'initialized' : True})
コード例 #6
0
def play_minimax_games(net, game_count, mcts_sim_count, network_color):
    """
    returns the error percentage of the optimal move prediction by the network
    the network and the mcts are used to predict the move to play
    :param net:                 the network
    :param game_count:          the number of games to play
    :param mcts_sim_count:      the number of monte carlo simulations
    :param network_color:       the color of the network
    :return:                    the score of the network vs the minimax player
    """
    mcts_list = [mcts.MCTS(tic_tac_toe.TicTacToeBoard()) for _ in range(game_count)]
    player = CONST.WHITE

    all_terminated = False
    while not all_terminated:
        # make a move with the az agent
        if player == network_color:
            # run all mcts simulations
            mcts.run_simulations(mcts_list, mcts_sim_count, net, 0)

            # paly the best move suggested by the mcts policy
            for i_mcts_ctx, mcts_ctx in enumerate(mcts_list):
                # skip terminated games
                if mcts_ctx.board.is_terminal():
                    continue

                policy = mcts_list[i_mcts_ctx].policy_from_state(mcts_ctx.board.state_id(), 0)
                move = np.where(policy == 1)[0][0]
                mcts_ctx.board.execute_action(move)

        # make an optimal minimax move
        else:
            for mcts_ctx in mcts_list:
                # skip terminated games
                if mcts_ctx.board.is_terminal():
                    continue

                move = mcts_ctx.board.minimax_move()
                mcts_ctx.board.execute_action(move)

        # swap the player
        player = CONST.WHITE if player == CONST.BLACK else CONST.BLACK

        # check if all games are terminated
        all_terminated = True
        for mcts_ctx in mcts_list:
            if not mcts_ctx.board.is_terminal():
                all_terminated = False
                break


    # extract the score from all boards
    tot_score = 0
    for mcts_ctx in mcts_list:
        score = mcts_ctx.board.white_score() if network_color == CONST.WHITE else mcts_ctx.board.black_score()
        tot_score += score

    tot_score /= game_count
    return tot_score
コード例 #7
0
ファイル: main.py プロジェクト: Nyashiki/erweitern_55
def main():
    neural_network = network.Network()

    config = mcts.Config()
    config.simulation_num = 800

    search = mcts.MCTS(config)
    selfplay.run(neural_network, search, True)
コード例 #8
0
 def __init__(self, role, max_simulate_count=500):
     BasePlayer.__init__(self, role)
     self.max_simulate_count = 500
     self.mcts = mcts.MCTS(
         c=5,
         max_simulate_count=max_simulate_count,
         policy_function=mcts.policy_function,
         rollout_policy_function=mcts.rollout_policy_function)
コード例 #9
0
def run_hourly():
    board = checkers.Board()
    MC = mcts.MCTS(board)
    MC.log.setLevel(mcts.logging.INFO)
    i = 0
    while True:
        print('starting hour', i)
        MC.run(t=60 * 60)
        MC.save_tree('trees/tree_{}h.p'.format(i))
        i += 1
コード例 #10
0
ファイル: player.py プロジェクト: Leonie-/mancala-python
    def play(self):
        pot = self.pick_random()

        if self.player_type == "minimax":
            pot = self.pick_minimax()

        if self.player_type == "alphabeta":
            pot = self.pick_minimax_alpha_beta()

        if self.player_type == "rightpot":
            pot = self.pick_right_pot()

        if self.player_type == "leftpot":
            pot = self.pick_left_pot()

        if self.player_type == "potwithfewest":
            pot = self.pick_pot_with_fewest_stones()

        if self.player_type == "potwithmost":
            pot = self.pick_pot_with_most_stones()

        if self.player_type == "takeanotherturn":
            pot = self.pick_pot_with_extra_turn(True)

        if self.player_type == "avoidanotherturn":
            pot = self.pick_pot_with_extra_turn(False)

        if self.player_type == "mcts":
            monte_carlo = mcts.MCTS(self.mancala, self.player_number,
                                    self.maximum_time_secs, self.maximum_depth)
            pot = monte_carlo.pick_pot()

        if self.player_type == "mcts-expansion-apriori":
            monte_carlo = MCTSModifiedFromApriori(self.mancala,
                                                  self.player_number,
                                                  self.maximum_time_secs,
                                                  self.maximum_depth)
            pot = monte_carlo.pick_pot()

        if self.player_type == "mcts-expansion-gsp":
            monte_carlo = MCTSModifiedFromGsp(self.mancala, self.player_number,
                                              self.maximum_time_secs,
                                              self.maximum_depth)
            pot = monte_carlo.pick_pot()

        if self.player_type == "mcts_simulation_minimax":
            monte_carlo = MCTSSimulationMiniMax(self.mancala,
                                                self.player_number,
                                                self.maximum_time_secs,
                                                self.maximum_depth)
            pot = monte_carlo.pick_pot()

        print(f"Pot chosen for play: {pot}")

        return self.mancala.play(self.player_number, pot)
コード例 #11
0
def evaluate(net1, net2, rounds, device="cpu"):
    n1_win, n2_win = 0, 0
    mcts_stores = [mcts.MCTS(), mcts.MCTS()]

    for r_idx in range(rounds):
        r, _ = model.play_game(mcts_stores=mcts_stores,
                               replay_buffer=None,
                               net1=net1,
                               net2=net2,
                               steps_before_tau_0=0,
                               mcts_searches=20,
                               mcts_batch_size=16,
                               device=device)
        if r < -0.5:
            n2_win += 1
        elif r > 0.5:
            n1_win += 1

    if (n1_win + n2_win) == 0: return 0
    return n1_win / (n1_win + n2_win)
コード例 #12
0
def test_play():
    ttt = CheckersGame(8, 8)
    s1 = mcts.MCTS(ttt, n_plays=20, max_depth=500, player=1)
    s2 = gaming.RandomStrategy(ttt, player=2)

    state, rewards, turn, log = gaming.play_game(ttt, [s1, s2], max_turns=100)
    print()
    print(
        f'the winner is the player {[p for p, r in rewards.items() if r == 1]}, turn: {turn}'
    )
    print(state)
    print(log)
コード例 #13
0
 def test1(self):
     """ TODO Comment
     """
     params = [(.98, .2), (.98, .1), (.99, .2), (.99, .1)]
     for param in params:
         path_lengths = []
         timings = []
         for _ in range(self.runs):
             mcts_obj = mcts.MCTS(self.env, *param)
             path, timing = mcts_obj.run(self.n_iter)
             path_lengths.append(len(path))
             timings.append(timing)
         self.report.append((path_lengths, timings, param))
コード例 #14
0
ファイル: generate_games.py プロジェクト: petersn/gaits
def generate_game(args):
    game_engine = game.build_demo_game_engine()

    entry = {"samples": [], "outcome": None}
    m = mcts.MCTS(game_engine.initial_state)
    all_steps = 0
    collapse = 0
    while True:
        if m.root_node.state.is_game_over():
            break
        most_visits = 0
        while most_visits < args.visits and m.root_node.all_edge_visits < args.visits * MAX_STEP_RATIO:
            all_steps += 1
            edge = m.step()
            most_visits = max(most_visits, edge.edge_visits)

        # Compute the proportion of visits each move received.
        total_visits = float(m.root_node.all_edge_visits)
        weighted_moves = {
            move: (m.root_node.outgoing_edges[move].edge_visits /
                   total_visits if move in m.root_node.outgoing_edges else 0)
            for move in m.root_node.state.moves
        }
        if weighted_moves["m0"] == 1.0:
            collapse += 1

        # Mix the policies by their visit counts to get a training policy.
        training_policy = np.sum([
            weight * m.root_node.state.moves[move].policy
            for move, weight in weighted_moves.iteritems()
        ],
                                 axis=0)

        # Store training sample.
        entry["samples"].append((
            map(float, m.root_node.state.sensor_data),
            map(float, training_policy),
            float(m.root_node.visit_weighted_edge_score()),
        ))

        # Step using the most visited move (with no noise).
        selected_move = mcts.sample_by_weight(weighted_moves)
        m.play(selected_move)

    entry["outcome"] = m.root_node.state.compute_utility()

    if collapse:
        print "!" * 100, "Collapses:", collapse

    return entry
コード例 #15
0
    def __init__(self, whiteNN, blackNN):
        self.pgn = chess.pgn.Game()

        self.board = chess.Board()
        self.currPlayer = chess.WHITE

        self.whiteNN = whiteNN
        self.blackNN = blackNN

        self.gameTree = mcts.MCTS(self.board)

        # Training examples
        self.moves = {chess.WHITE : [],
                      chess.BLACK : []}
コード例 #16
0
 def test1(self):
     """ TODO Comment
     """
     params = [(.95, .4), (.95, .3), (.95, .2), (.97, .4), (.97, .3),
               (.8, .5), (.8, .4), (.8, .3), (.83, .5), (.83, .4),
               (.83, .3), (.9, .5)]
     for param in params:
         print('running', param)
         path_lengths = []
         timings = []
         for _ in range(self.runs):
             mcts_obj = mcts.MCTS(self.env, *param)
             path, timing = mcts_obj.run(self.n_iter)
             path_lengths.append(len(path))
             timings.append(timing)
         self.report.append((path_lengths, timings, param))
コード例 #17
0
ファイル: eval.py プロジェクト: atpcurr/atpcurr
def find_one_proof(args, model, env, file):
    env.set_source(file)
    env.args.curriculum_allowed = False
    env.args.max_exploration = None
    env.args.can_replace_proof = False
    env.args.use_replay = False
    env.args.use_action_shuffle = False

    success = 0
    prooflen = 0
    if args.evaltype == "mcts":
        evaltype = args.evaltype
        my_mcts = mcts.MCTS(model, args.n_action_slots)
        t0 = time.time()
        success, prooflen, attempts = my_mcts.build_tree(
            env, args.evaltime, args.evalcount)
        if success == 1:
            print("Proof found: {}, len {}, time: {} sec,\n{}".format(
                file, prooflen,
                time.time() - t0, env.current_steps))
    else:
        for attempts in range(1, 1 + args.evalcount):
            obs = env.reset()
            t0 = time.time()
            if args.evaltype == "backtrack":
                status, prooflen = prove_nonrecursive(args, model, env, t0)
                evaltype = args.evaltype
            else:
                if attempts == 1:
                    evaltype = "det"
                    evaltime = 10000
                else:
                    evaltype = args.evaltype
                    evaltime = args.evaltime
                status, prooflen = prove_nobacktrack(args, model, env, obs, t0,
                                                     evaltype, evaltime)
            if status == "success":
                print("Proof found: {}, len {}, time: {} sec,\n{}".format(
                    file, prooflen,
                    time.time() - t0, env.current_steps))
                success = 1
                break
    if success == 0:
        print("Failure: {}".format(file))
    return success, prooflen, attempts, evaltype
コード例 #18
0
def play_game (inference):
    # Initialize memory
    actions = []
    policies = []
    indices = []
    moves = []

    # Set up search tree
    state = game_state.GameState()
    tree = mcts.MCTS(inference, state, num_threads=8)

    # Play game
    while not tree.state.done():
        print(tree.state.state.unicode())

        # Perform search
        node = tree.search(128)

        # Calculate move probabilities and get action index
        probs = mcts.policy(node, T=1.0)
        index = np.random.choice(len(node.actions), p=probs)

        # Get action and update tree
        action = node.actions[index]
        value = node.Q[index]
        move = tree.state.parse_action(action)

        print(tree.state.state.san(move), value)

        tree.act(index)

        # Store stats
        actions.append(action)
        policies.append(probs)
        indices.append(node.actions)
        moves.append(move)

    # Get game outcome and last player to move
    outcome = -tree.state.reward()
    winner = not tree.state.turn()

    print(tree.state.state.unicode())
    print(' '.join([chess.Board().variation_san(moves), state.state.result()]))

    return actions, policies, indices, outcome, winner
コード例 #19
0
ファイル: client.py プロジェクト: Nyashiki/erweitern_55
    def run(self):
        mcts_config = mcts.Config()
        mcts_config.batch_size = 16
        mcts_config.simulation_num = 800
        mcts_config.forced_playouts = False
        mcts_config.use_dirichlet = True
        mcts_config.reuse_tree = True
        mcts_config.target_pruning = False
        mcts_config.immediate = False

        search = mcts.MCTS(mcts_config)

        device = 'cpu' if self.cpu_only else 'gpu'
        self.nn = network.Network(device)

        iter = 0

        while True:
            # Ask the server the current neural network parameters.
            if self.update and iter % self.update_iter == 0:
                url = 'http://{}:{}/weight'.format(self.host, self.port)
                req = urllib.request.Request(url)
                with urllib.request.urlopen(req) as res:
                    weights = _pickle.loads(res.read())
                    self.nn.model.set_weights(weights)

            # Conduct selfplay.
            if self.random_play:
                game_record = selfplay.random_play(
                    stop_with_checkmate=False, trim_checkmate=False)

            else:
                search.clear()
                game_record = selfplay.run(
                    self.nn, search, search_checkmate=self.search_checkmate, stop_with_checkmate=False, trim_checkmate=False)

            # Send result.
            url = 'http://{}:{}/record'.format(self.host, self.port)
            data = _pickle.dumps(game_record, protocol=4)
            req = urllib.request.Request(url, data)
            with urllib.request.urlopen(req) as res:
                pass

            iter += 1
コード例 #20
0
ファイル: usi.py プロジェクト: Nyashiki/erweitern_55
    def isready(self):
        if self.nn is None:
            self.nn = network.Network()

            if self.weight_file is not None:
                self.nn.load(self.weight_file)

        self.config = mcts.Config()
        self.config.simulation_num = int(1e9)
        self.config.reuse_tree = True

        if self.search is None:
            self.search = mcts.MCTS(self.config)
        self.search.clear()

        self.position = minishogilib.Position()

        # ponder
        self.ponder_thread = None
コード例 #21
0
def train(_, network, lock, loss_record):
    game_index = network.count()
    print('{} game start'.format(game_index))

    tree = mcts.MCTS(network, game_index)
    game_begin_time = int(time.time())
    tree.game()

    learn_begin_time = int(time.time())

    lock.acquire()
    print('{} learn start'.format(game_index))

    mse_total, cross_entropy_total = 0, 0
    np.random.seed(int.from_bytes(os.urandom(4), byteorder='little'))
    for _ in range(TRAINING_STEP):
        mse, cross_entropy = network.learn()
        mse_total += mse
        cross_entropy_total += cross_entropy
    loss_record.add(
        [mse_total / TRAINING_STEP, cross_entropy_total / TRAINING_STEP])
    print([mse_total / TRAINING_STEP, cross_entropy_total / TRAINING_STEP])

    if loss_record.size() % SAVE_INTERVAL == 0:
        print('save')
        network.save_state("{}state_{}.pkl".format(STATE_SAVE_FOLDER,
                                                   loss_record.size()))
        network.save_memory("memory.npy")
        loss_record.save("loss_record.pkl")

    print('{} learn end'.format(game_index))
    lock.release()

    print('{} game end'.format(game_index))

    learn_end_time = int(time.time())
    learn_min, learn_sec = utils.compute_time(learn_begin_time, learn_end_time)
    print('learning cost {} mins {} seconds'.format(learn_min, learn_sec))

    game_end_time = int(time.time())
    game_min, game_sec = utils.compute_time(game_begin_time, game_end_time)
    print('{} game cost {} mins {} seconds'.format(game_index, game_min,
                                                   game_sec))
コード例 #22
0
ファイル: tictactoe.py プロジェクト: dllllb/imaginarium
def test_play():
    ttt = TicTacToeGame(size_x=4, size_y=4, len_to_win=3, n_players=2)
    s1 = mcts.MCTS(ttt, n_plays=50, max_depth=500, player=1)
    s2 = gaming.RandomStrategy(ttt, player=2)

    state, rewards, turn, log = gaming.play_game(ttt, [s1, s2], max_turns=50)
    print()
    print(
        f'the winner is the player {[p for p, r in rewards.items() if r == 1]}, turn: {turn}'
    )
    print(state)
    print(log)

    state, rewards, turn, log = gaming.play_game(ttt, [s1, s2], max_turns=50)
    print()
    print(
        f'the winner is the player {[p for p, r in rewards.items() if r == 1]}, turn: {turn}'
    )
    print(state)
    print(log)
コード例 #23
0
ファイル: tst.py プロジェクト: arif1903/Floorplan
def tst_sequence_children():
    Cons = np.array([[1, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                     [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]])
    roomids = [1, 2, 3, 4, 5]
    design = mcts.MCTS(Cons, 2000)
    design.play()
    states = []
    rootnode = design.real_path[0]
    queue = [rootnode]
    while len(queue) > 0:
        node = queue.pop(0)
        if node.expanded:
            if node.terminal is False:
                for child in node.children:
                    queue.append(child)
                    if child.type == 'R':
                        states.append(child.state)

    states_path = states[0:63]
    vis = mcts.Visualisation(roomids, states_path, Cons, 'unknown')
    vis.vis_static()
コード例 #24
0
    def act(self, obs, action_space):
        state = self._create_simulation_state(obs)

        if self._process_count:
            # Multiprocessing

            def _mcts_search(_state, _agent_id, _simulation_env, _iteration_limit, _shared_list):
                _env_state = _EnvState(_state, _agent_id, _simulation_env)
                _searcher = mcts.MCTS(_env_state, iteration_limit=_iteration_limit)
                _shared_list.append(_searcher.search())

            def get_most_frequent(l):
                count = Counter(l)
                return count.most_common(1)[0][0]

            with Manager() as manager:
                shared_list = manager.list()
                processes = []

                for _ in range(self._process_count):
                    env_copy = self._make_env_copy()
                    processes.append(Process(
                        target=_mcts_search,
                        args=(state, self._agent_id, env_copy, self._iteration_limit, shared_list)
                    ))

                for p in processes:
                    p.start()

                for p in processes:
                    p.join()

                action = get_most_frequent(shared_list)
        else:
            env_state = _EnvState(state, self._agent_id, self._simulation_env)
            searcher = mcts.MCTS(env_state, iteration_limit=self._iteration_limit)
            action = searcher.search()

        return action
コード例 #25
0
ファイル: game.py プロジェクト: cherie11/Fast-Reversi-AI
	def __init__(self,human):
		self.MCTS=mcts.MCTS()
		#White goes first (0 is white and player,1 is black and computer)
		self.human=human
		self.player = 1
		self.passed = False
		self.won = False
		#Initializing an empty board
		self.array = np.zeros([8,8],dtype=np.int)
		#Initializing center values
		self.array[3][3] = 1
		self.array[3][4]=-1	
		self.array[4][3]=-1
		self.array[4][4]=1
		self.oldarray = self.array
		global BIT
		global LSB_TABLE
		global bitmap

		for i in range(64):
			LSB_TABLE[(((bitmap & (~bitmap + 1)) * LSB_HASH) & FULL_MASK) >> 58] = i
			bitmap <<= 1
コード例 #26
0
def main():
    # Configure argparser
    argparser = argparse.ArgumentParser(prog="do_mcmc_draft_search")
    configure_argparser(argparser)

    # Parse the arguments
    args = argparser.parse_args()

    # Configure logging
    utils.configure_logging(args.verbosity_level)

    # Get names of input/output files
    draftboard_file = args.db_file
    league_config_file = args.league_config
    time_to_run = args.time
    exploration_const = args.exp_constant
    bench_weight = args.bench_weight
    n_rollouts = args.n_rollouts
    sim_injury = args.sim_injury

    # Read config file
    with open(league_config_file, "r") as stream:
        league_config = yaml.safe_load(stream)

    # Read draft sheet
    draft_df = pd.read_excel(draftboard_file)

    # Initialize and validate draft board
    db = draftboard.DraftBoard(draft_df, league_config)

    # Get my potential picks
    my_players = db.potential_picks[cols.NAME_FIELD].tolist()
    if not my_players:
        my_players = db.get_auto_draft_selections()
    logging.info("Players to compare: {0}".format(", ".join(my_players)))

    injury_risk_model = mcts_draft.EmpiricalInjuryModel(
        league_config) if sim_injury else None

    draft_tree_helper = mcts_draft.DraftTreeHelper(
        my_players,
        db,
        min_adp_prior=0.01,
        max_draft_node_size=25,
        injury_model=injury_risk_model,
        bench_weight=bench_weight)

    # Initialize MCTS for mcmc tree search
    mcmc_tree = mcts.MCTS(root_state=draft_tree_helper.get_root(),
                          tree_helper=draft_tree_helper,
                          time_limit=time_to_run * 1000 * 60,
                          num_rollouts=n_rollouts,
                          exploration_constant=exploration_const)

    # Do MCTS search and output best player
    best_action = mcmc_tree.search()
    logging.info("THIS THE BEST PLAYER:\n"
                 "**********************************************\n\n{0}\n\n"
                 "**********************************************".format(
                     best_action.upper()))

    # Also output best player for next round from best player
    best_node = mcmc_tree.root.children[best_action]
    logging.info("Next round best players: ")
    for child in best_node.children:
        logging.info(best_node.children[child])
コード例 #27
0
ファイル: main.py プロジェクト: nordbyandreas/HEX
    def run(self):

        print("Starting up..  Playing " + str(self.numGames) + " games:")

        # set save interval for actor network parameters
        # clear the replayBuffer
        # randomly init weights and biases for Actor network

        self.Anet.setupSession()
        self.Anet.error_history = []
        self.Anet.validation_history = []

        startNode = Node.Node(
            state=State.State(player=self.player, hexSize=self.hexSize))
        mcts = MCTS.MCTS(numberOfSimulationsPerMove=self.numSimulations,
                         hexTrainer=self,
                         Anet=self.Anet)

        player = startNode.getState().getPlayer()

        startNodeCopy = startNode

        player1Wins = 0
        player2Wins = 0
        player1Starts = 0
        player2Starts = 0

        gc = 1

        #for a game in numberOfGames
        for game in range(0, self.numGames):
            #Start of a game

            #clear replay buffer
            self.replayBuffer = []

            #initialize gameboard to empty board
            startNode = startNodeCopy
            startingPlayer = startNode.getState().getPlayer()

            if startingPlayer == 1:
                player1Starts += 1
            else:
                player2Starts += 1

            print("\n\n\n --- Game number " + str(gc))

            #print starting state
            startNode.getState().getBoard().printBoard()

            while not startNode.getState().gameIsOver():

                player = startNode.getState().getPlayer()

                #use tree policy to search from root to leaf
                #use ANET to choose rollout actions from L to final state
                #perform mcts-backpropogation
                #next gamestate
                print("Player " + str(player) + "'s turn")
                print("legal moves:")
                print(startNode.getState().getBoard().getLegalMoves())

                nextNode = mcts.findNextMove(startNode, player, startingPlayer)

                # D = distribution of visitCounts alogn all arcs emanating from root
                # add case (root, D) to replayBuffer
                #choose actual move (action*) based on D
                #perform action* on root to produce successor state s*
                #update currentstate to s*
                # in mcts - retain subtree rooted at s*, discard everything else
                # rootnode = s*

                #TODO change this ?
                if self.verbose: nextNode.getState().getBoard().printBoard()

                if nextNode.getState().gameIsOver():

                    if self.verbose:
                        print("\nPlayer " + str(player) + " won! \n")

                    if player == 1:
                        player1Wins += 1
                    else:
                        player2Wins += 1

                startNode = nextNode
                if nextNode.getState().gameIsOver():
                    break
            gc += 1

            # train ANET on random minibatch of cases from replayBuffer
            np.random.shuffle(self.replayBuffer)

            #TODO write a custom do_training method

            inputs = [case[0] for case in self.replayBuffer]
            targets = [case[1] for case in self.replayBuffer]
            print("inputs:")
            print(inputs)
            print("targets")
            print(targets)
            feeder = {self.Anet.input: inputs, self.Anet.target: targets}
            gvars = [self.Anet.error] + self.Anet.grabvars
            _, grabvals, _ = self.Anet.run_one_step(
                [self.Anet.trainer],
                gvars,
                session=self.Anet.current_session,
                feed_dict=feeder)
            error = grabvals[0]
            self.Anet.error_history.append((gc, error))

            # if gameNum %modulo saveinterval: save ANET parameters for later use in TOPP

            #next game

        #print result of all games
        print("\nPlayer 1 started " + str(player1Starts) + " games and won " +
              str(player1Wins) + " of " + str(self.numGames) + " games!   " +
              str((player1Wins / self.numGames * 100)) + " % ")
        print("Player 2 started " + str(player2Starts) + " games and won " +
              str(player2Wins) + " of " + str(self.numGames) + " games!   " +
              str((player2Wins / self.numGames * 100)) + " % ")
        print("\n")
        TFT.plot_training_history(self.Anet.error_history,
                                  self.Anet.validation_history,
                                  xtitle="Game",
                                  ytitle="Error",
                                  title="",
                                  fig=True)

        self.Anet.close_current_session(view=False)

        #loop to keep program from closing at the end so we can view the graph
        x = ""
        while x == "":
            x = str(input("enter any key to quit"))
コード例 #28
0
import mcts
import same_game_env

env = same_game_env.Env()
mcts = mcts.MCTS(env)
print(mcts.search(100))
コード例 #29
0

def read_file(file, h):
    with open(file) as f:
        _, _ = [int(x) for x in next(f).split()]  # read first line
        stacks = []
        for line in f:  # read rest of lines
            stack = [int(x) for x in line.split()[1::]]
            #if stack[0] == 0: stack.pop()
            stacks.append(stack)

        S = len(stacks)
        cells = np.zeros((S, h), dtype=int)

        for stack in range(S):
            for tier in range(len(stacks[stack])):
                cells[stack][tier] = stacks[stack][tier]

    return (cells, S)


H = 5
cells, stacks = read_file("instancias\\BF\\BF1\\cpmp_16_5_48_10_29_1.bay", H)

state = MarshallingState(cells, stacks, H)

agent = mcts.MCTS()
agent.search(state)
print(agent.best_state.get_reward())
print(agent.best_state.cells)
コード例 #30
0
BATCH_SIZE = 256
TRAIN_ROUNDS = 10
MIN_REPLAY_TO_TRAIN = 2000  #10000

BEST_NET_WIN_RATIO = 0.60

EVALUATE_EVERY_STEP = 100
EVALUATION_ROUNDS = 20
STEPS_BEFORE_TAU_0 = 10

device = torch.device("cpu")

path = os.getcwd()
model_path = os.path.join(path, f"model4")
net = torch.load(model_path)
mcts = mcts.MCTS()

while True:

    won = None
    cur_player = s.get_random_player()
    cur_state = s.init()

    print(s.decode(cur_state))

    while won is None:

        print(f"Player: {cur_player}")

        if cur_player == 1:
            mcts.search_batch(20,