Python Player 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: environment

클래스/타입: Player

hotexamples.com에서의 예제들: 11

Python Player - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 environment.Player에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Player(10)

__init__(1)

자주 사용되는 메소드들

Player (10)

__init__ (1)

예제 #1

파일 보기

def test_learner_against_rando(n_games=10000):
    # see how learner performs against a player making random moves
    metrics = run_simulator(
        p1=Player(strategy="basic_q", learning=False, load_Q=True),
        p2=Player(strategy="random"),
        n_games=n_games,
    )
    visualize_win_ratio(metrics, "Performance Over Time (testing)")

예제 #2

파일 보기

파일: agent.py 프로젝트: gairTanm/easy-21

 def __init__(self, environment, No=100, discount_factor=1):
     Player.__init__(self)
     self.env = environment
     self.No = No
     self.disc_factor = discount_factor
     self.V = np.zeros(
         [self.env.dealer_max_value + 1, self.env.agent_max_value + 1])
     self.wins = 0.0
     self.iterations = 0.0

예제 #3

파일 보기

def train_learner_against_rando(n_games=20000):
    # load existing Q strategy and trains more against random strategy
    metrics = run_simulator(
        p1=Player(strategy="basic_q", learning=True, load_Q=True),
        p2=Player(strategy="random"),
        save_Q=True,
        n_games=n_games,
    )
    visualize_win_ratio(metrics, "Performance Over Time (testing)")

예제 #4

파일 보기

def test_learner_against_self():
    # see how the leaner performs against itself; results should be even
    metrics = run_simulator(
        p1=Player(
            strategy="basic_q",
            learning=False,
            load_Q=True,
        ),
        p2=Player(strategy="basic_q", learning=False, load_Q=True),
    )
    visualize_win_ratio(metrics, "Performance Over Time (testing)")

예제 #5

파일 보기

def run_simulator(
        n_games=1000,
        p1=Player(strategy="basic_q", learning=True),
        p2=Player(strategy="random", learning=False),
        save_Q=False,
):
    """Runs a number of tic tac toe games
	
	Arguments:
		n_games: number of tic tac toe games to be played
		p1: instance of the Player() class, should be used to specify the AI
			during training or testing
		p2: instance of the Player() class, should be used to specify the
			opponent
		save_Q: boolean, determines if p1's updated Q should be saved in a
			pickled file after training is complete

	Returns:
		metrics: dict of P1's wins, losses, and ties

	Use this function to run many games in a row. Depending on the 
	parameters for P1 and P2, this could be used for training or testing.
	This function also specifies the epsilon (exploration factor) decay
	over time as the learner moves from high exploration to low.
	"""
    games = [Game(p1=p1, p2=p2) for i in range(n_games)]
    metrics = []
    index = 0
    starting_epsilon = p1._epsilon

    for game in games:
        index += 1

        # run game and get results + P1's states and actions
        outcome, x_decisions, o_decisions = game.play_game()

        # log game results
        metrics.append(outcome)

        # update q learner after each game
        if p1._learning:
            p1.update_q(outcome, x_decisions)

            # reduce exploration factor after each game
            p1._epsilon = starting_epsilon - starting_epsilon * (1. * index /
                                                                 n_games)**2

    print Counter(metrics)

    # save pickled Q learning file
    if save_Q:
        pickle.dump(p1._Q, open(p1.q_file, "wb"))
    return metrics

예제 #6

파일 보기

def train_learner_against_self(n_sessions=5, games_per_session=10000):
    """Train a learner against itself
	
	Loads Q file for both P1 and P2. Learner (P1) starts with a high
	exploration factor that decays over time, while P2 uses no exploration
	factor. P1 updates its Q file over the course of *games_per_session*.

	After *games_per_session* have been played, P2 then updates its Q
	strategy to match P1's again, and the process continues iteratively for
	*n_sessions*
	"""
    for n in range(n_sessions):
        run_simulator(
            n_games=games_per_session,
            p1=Player(strategy="basic_q", learning=True, load_Q=True),
            p2=Player(strategy="basic_q", learning=False, load_Q=True),
            save_Q=True,
        )
        print "%d training sessions completed out of %d" % (n + 1, n_sessions)

예제 #7

파일 보기

파일: replay_buffer.py 프로젝트: neuralsyn/muzero

 def SaveGame(self, request, context):
     game_history = GameHistory()
     game_history.observations = [tf.make_ndarray(observation) for observation in request.observations]
     game_history.actions = [Action(index) for index in request.actions]
     game_history.rewards = request.rewards
     game_history.to_plays = [Player(player_id) for player_id in request.to_plays]
     game_history.root_values = request.root_values
     game_history.policies = [policy.probabilities for policy in request.policies]
     self.replay_buffer.save_history(game_history)
     print('Number of games in buffer: {}'.format(len(self.replay_buffer.buffer)))
     return replay_buffer_pb2.SaveGameResponse(success=True)

예제 #8

파일 보기

파일: agent.py 프로젝트: neuralsyn/muzero

    def run_mcts(self, root, num_moves):
        min_max_stats = MinMaxStats(self.config.known_bounds)

        for _ in range(self.config.num_simulations):
            # root.print()
            action, leaf, cur_moves = self.select_leaf(root, num_moves, min_max_stats)
            to_play = Player(cur_moves % self.config.game_config.num_players)

            batch_hidden_state = tf.expand_dims(leaf.parent.hidden_state, axis=0)
            network_output = self.network.recurrent_inference(batch_hidden_state, [action]).split_batch()[0]
            self.expand_node(node=leaf, to_play=to_play, actions=self.config.game_config.action_space,
                             network_output=network_output)
            self.backpropagate(leaf, network_output.value, to_play, min_max_stats)

예제 #9

파일 보기

def human_vs_human():
    Game(p1=Player(strategy="human"),
         p2=Player(strategy="human"),
         verbose=True).play_game()

예제 #10

파일 보기

def adversarial_training(
        p1=Player(strategy="basic_q", learning=False, load_Q=True),
        p2=Player(strategy="random", learning=False),
        p3=Player(strategy="basic_q", learning=True, load_Q=True),
        p4=Player(strategy="adversarial", learning=False),
):
    """Finds strategies that beat the AI for targeted training

	Arguments:
		p1: trained Q-learner in "test mode" (no learning)
		p2: cpu player choosing random moves
		p3: trained Q-leaner that will continue to train
		p4: p3's opponent, uses saved strategy uncovered by p2 that beat p1

	Plays a trained AI against a random player until (if) the random player
	wins. If the random player does win, it will save that strategy in its
	own Q file. It will then play this adversarial scenario many times so
	that the AI can learn a better strategy.

	Training is very specific and deep (many repetitions), so this is not
	meant to be a general training strategy and is best used after the AI
	is already sufficiently robust. Each time this function runs will only
	cover one adversarial example, so it may need to be run many times.
	"""

    # phase 1: AI vs. random opponent
    max_games = 50000
    games = [Game(p1=p1, p2=p2) for i in range(max_games)]

    for game in games:

        # run game and get results + P1's states and actions
        outcome, x_decisions, o_decisions = game.play_game()

        # build a Q network for player O only where X lost
        if outcome == "lost":

            # phase 2: adversarial training
            print "AI lost. Playing adversarial games..."
            Q_adversarial = {board: action for board, action in o_decisions}
            num_games = 10000
            a_games = [Game(
                p1=p3,
                p2=p4,
            ) for i in range(num_games)]

            starting_epsilon = p3._epsilon

            a_index = 0
            a_metrics = []
            for game in a_games:
                p4._Q = Q_adversarial
                a_index += 1
                outcome, x_decisions, o_decisions = game.play_game()
                a_metrics.append(outcome)
                p3.update_q(outcome, x_decisions)
                p3._epsilon = starting_epsilon - starting_epsilon * (
                    1. * a_index / num_games)**2

            print "Adversarial game outcomes:\n"
            print Counter(a_metrics)

            print "Building better, stronger Q..."
            pickle.dump(p3._Q, open(p3.q_file, "wb"))

            # end training
            return
    print "played %d games without losing!" % max_games

예제 #11

파일 보기

def test_learner_against_human():
    # tests Q-learner against human player ("O")
    p1 = Player(strategy="basic_q", load_Q=True)
    p2 = Player(strategy="human")
    game = Game(p1=p1, p2=p2, verbose=True)
    game.play_game()