if monte_carlo and (round_index <= 1): # exploring starts player.next_action = np.random.choice(card_game.num_actions) else: player.next_action = player.policy[policy_index] return play_action(card_showing, player) q_learning = MonteCarloLearning(card_game.num_states, card_game.num_actions) monte = Player() opponent = Player() for episode_index in xrange(NUM_GAMES_TO_PLAY): deck.shuffle_deck() monte.pick_up_cards(deck.deal_cards(card_game.hand_size)) opponent.pick_up_cards(deck.deal_cards(card_game.hand_size)) q_learning.clear_states_seen() for round_index in xrange(card_game.num_rounds): sum_of_cards = 0. if round_index % 2 == 0: sum_of_cards = take_turn(monte, round_index, sum_of_cards, monte_carlo=True) sum_of_cards += take_turn(opponent, round_index, sum_of_cards) else: card_showing = take_turn(opponent, round_index, sum_of_cards) sum_of_cards = take_turn(monte, round_index, sum_of_cards, monte_carlo=True) if monte.last_card_played + opponent.last_card_played <= 1: