Example #1
0
    def reset(self):
        self.drawbag = DrawBag(self.contents)
        self.players = []

        player_id = 1
        for p in range(self.n_players):
            self.players.append(Player(str(player_id)))
            player_id += 1


        self.current_player_num = 0
        self.done = False
        logger.debug(f'\n\n---- NEW GAME ----')

        self.board = Board(self.board_size)
        
        self.board.fill(self.drawbag.draw(self.squares))

        for net in self.nets:
            self.board.add_net(net)

        self.place_hudson()

        self.turns_taken = 0

        return self.observation
Example #2
0
    def reset(self):
        self.round = 0
        self.deck = Deck()
        self.players = []
        self.action_bank = []
        self.played_cards = []

        self.zobrane3 = False
        self.stalsom = False
        self.zobrane3 = False
        self.stalsom = False
        self.menimna = ""
        self.ktoryvysnik = ""
        self.beres = 0

        player_id = 1
        for p in range(self.n_players):
            self.players.append(Player(str(player_id)))
            player_id += 1

        for player in self.players:
            player.hand.add(self.deck.pop(5))

        self.current_player_num = 0
        self.done = False
        self.tableCard = self.deck.pop()[0]
        logger.debug(f"\n\n---- NEW GAME ----")
        return self.observation
Example #3
0
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()


    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)
    save_dir = get_save_dir( this_run_dir)


    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)


    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    result = do_pca(cma_args.n_components, cma_args.n_comp_to_use, traj_params_dir_name, intermediate_data_dir,
                    proj=False,
                    origin="mean_param", use_IPCA=cma_args.use_IPCA, chunk_size=cma_args.chunk_size, reuse=True)
    logger.debug("after pca")

    final_pcs = result["first_n_pcs"]

    all_param_iterator = get_allinone_concat_df(dir_name=traj_params_dir_name, use_IPCA=True, chunk_size=cma_args.pc1_chunk_size)
    plane_angles_vs_final_plane_along_the_way = []
    ipca = IncrementalPCA(n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up
    for chunk in all_param_iterator:

        logger.log(f"currently at {all_param_iterator._currow}")
        ipca.partial_fit(chunk.values)

        first_n_pcs = ipca.components_[:cma_args.n_comp_to_use]
        assert final_pcs.shape[0] == first_n_pcs.shape[0]


        plane_angle = cal_angle_between_nd_planes(first_n_pcs, final_pcs)
        plane_angles_vs_final_plane_along_the_way.append(plane_angle)


    plot_dir = get_plot_dir(cma_args)
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    plane_angles_vs_final_plane_plot_dir = get_plane_angles_vs_final_plane_along_the_way_plot_dir(plot_dir, cma_args.n_comp_to_use)
    if not os.path.exists(plane_angles_vs_final_plane_plot_dir):
        os.makedirs(plane_angles_vs_final_plane_plot_dir)




    angles_plot_name = f"plane_angles_vs_final_plane_plot_dir "
    plot_2d(plane_angles_vs_final_plane_plot_dir, angles_plot_name, np.arange(len(plane_angles_vs_final_plane_along_the_way)), plane_angles_vs_final_plane_along_the_way, "num of chunks", "angle with diff in degrees", False)
Example #4
0
    def reset(self):
        self.deck = Deck(self.contents)
        self.discard = Discard()
        self.discard.add(self.deck.draw(self.cards_to_discard))

        self.centre_card = Position()
        if self.manual:
            next_card = input('What card is drawn?: ')
            self.centre_card.add(self.deck.pick(next_card))
        else:
            self.centre_card.add(self.deck.draw(1))

        self.centre_counters = Counters()

        self.players = []

        player_id = 1
        for p in range(self.n_players):
            self.players.append(Player(str(player_id)))
            player_id += 1

        for p in self.players:
            p.position = Position()
            p.counters.add(self.counters_per_player)

        self.turns_taken = 0
        self.current_player_num = 0
        self.done = False

        logger.debug(f'\n\n---- NEW GAME ----')
        return self.observation
    def check_game_over(self):
        board = self.board
        current_player_num = self.current_player_num
        players = self.players

        # check game over
        for i in range(self.grid_length):
            # horizontals and verticals
            if ((self.square_is_player(i * self.grid_length, current_player_num) and self.square_is_player(
                    i * self.grid_length + 1, current_player_num) and self.square_is_player(i * self.grid_length + 2,
                                                                                            current_player_num))
                    or (self.square_is_player(i + 0, current_player_num) and self.square_is_player(i + self.grid_length,
                                                                                                   current_player_num) and self.square_is_player(
                        i + self.grid_length * 2, current_player_num))):
                return 1, True

        # diagonals
        if ((self.square_is_player(0, current_player_num) and self.square_is_player(4,
                                                                                    current_player_num) and self.square_is_player(
                8, current_player_num))
                or (self.square_is_player(2, current_player_num) and self.square_is_player(4,
                                                                                           current_player_num) and self.square_is_player(
                    6, current_player_num))):
            return 1, True

        if self.turns_taken == self.num_squares:
            logger.debug("Board full")
            return 0, True

        return 0, False
Example #6
0
    def choose_tile(self, square):
        tile = self.board.remove(square)
        if tile is None:
            logger.debug(f"Player {self.current_player.id} trying to pick tile from square {square} but doesn't exist!")
            raise Exception('tile not found')

        logger.debug(f"Player {self.current_player.id} picking {tile.symbol}")
        self.current_player.position.add([tile])
Example #7
0
 def reset(self):
     self.board = [Token('.', 0)] * self.num_squares
     self.players = [Player('1', Token('X', 1)), Player('2', Token('O', -1))]
     self.current_player_num = 0
     self.turns_taken = 0
     self.done = False
     logger.debug(f'\n\n---- NEW GAME ----')
     return self.observation
    def switch_hands(self):
        logger.debug(f'\nSwitching hands...')
        playernhand = self.players[-1].hand

        for i in range(self.n_players - 1, -1, -1):
            if i > 0:
                self.players[i].hand = self.players[i - 1].hand

        self.players[0].hand = playernhand
Example #9
0
        def continue_game(self):
            while self.current_player_num != self.agent_player_num:
                self.render()
                action = self.current_agent.choose_action(self, choose_best_action = False, mask_invalid_actions = False)
                observation, reward, done, _ = super(SelfPlayEnv, self).step(action)
                logger.debug(f'Rewards: {reward}')
                logger.debug(f'Done: {done}')
                if done:
                    break

            return observation, reward, done, None
Example #10
0
    def print_top_actions(self, action_probs, env=None):
        top5_action_idx = np.argsort(-action_probs)[:5]
        top5_actions = action_probs[top5_action_idx]

        if env is not None and env.name == "blobwar":
            formatter = env.decode_action
        else:
            formatter = lambda action: action

        logger.debug(
            f"Top 5 actions: {[str(formatter(i)) + ': ' + str(round(a, 5)) for i, a in zip(top5_action_idx, top5_actions)]}"
        )
    def step(self, action):

        reward = [0] * self.n_players
        done = False

        # check move legality
        if self.legal_actions[action] == 0:
            reward = [1.0 / (self.n_players - 1)] * self.n_players
            reward[self.current_player_num] = -1
            done = True

        #play the card(s)
        else:
            self.action_bank.append(action)

            if len(self.action_bank) == self.n_players:
                logger.debug(
                    f'\nThe chosen cards are now played simultaneously')
                for i, action in enumerate(self.action_bank):
                    player = self.players[i]

                    pickup_chopsticks, first_card, second_card = self.convert_action(
                        action)
                    self.play_card(first_card, player)

                    if pickup_chopsticks:
                        self.pickup_chopsticks(player)
                        self.play_card(second_card, player)

                self.action_bank = []
                self.switch_hands()

            self.current_player_num = (self.current_player_num +
                                       1) % self.n_players

            if self.current_player_num == 0:
                self.turns_taken += 1

            if self.turns_taken == self.cards_per_player:
                self.score_round()

                if self.round >= self.n_rounds:
                    self.score_puddings()
                    reward = self.score_game()
                    done = True
                else:
                    self.render()
                    self.reset_round()

        self.done = done

        return self.observation, reward, done, {}
    def score_puddings(self):
        logger.debug('\nPudding counts...')

        puddings = []
        for p in self.players:
            puddings.append(
                len([
                    card for card in p.position.cards if card.type == 'pudding'
                ]))

        logger.debug(f'Puddings: {puddings}')

        pudding_winners = self.get_limits(puddings, 'max')

        for i in pudding_winners:
            self.players[i].score += 6 // len(pudding_winners)
            logger.debug(
                f'Player {self.players[i].id} 1st place puddings: {6 // len(pudding_winners)}'
            )

        pudding_losers = self.get_limits(puddings, 'min')

        for i in pudding_losers:
            self.players[i].score -= 6 // len(pudding_losers)
            logger.debug(
                f'Player {self.players[i].id} last place puddings: {-6 // len(pudding_losers)}'
            )
Example #13
0
    def choose_action(self, env, choose_best_action, mask_invalid_actions):
        if self.name == "greedy":
            return self.greedy(env, choose_best_action, mask_invalid_actions)

        if self.name == 'rules':
            action_probs = np.array(env.rules_move())
            value = None
        else:
            action_probs = self.model.action_probability(env.observation)
            value = self.model.policy_pi.value(np.array([env.observation]))[0]
            logger.debug(f'Value {value:.2f}')

        # logger.debug(f'\n action probs:{action_probs} ')
        self.print_top_actions(action_probs, env=env)

        if mask_invalid_actions:
            action_probs = mask_actions(env.legal_actions, action_probs)
            logger.debug('Masked ->')
            self.print_top_actions(action_probs, env=env)

        action = np.argmax(action_probs)

        formatter = env.decode_action if env is not None and env.name == "blobwar" else lambda action: action

        logger.debug(f'Best action {formatter(action)}')

        if not choose_best_action:
            action = sample_action(action_probs)
            logger.debug(f'Sampled action : {formatter(action)} chosen')

        return action
Example #14
0
        def setup_opponents(self):
            if self.opponent_type == 'rules':
                self.opponent_agent = Agent('greedy')
            else:
                # incremental load of new model
                best_model_name = get_best_model_name(self.name)
                if self.best_model_name != best_model_name:
                    self.opponent_models.append(
                        load_model(self, best_model_name))
                    self.best_model_name = best_model_name

                if self.opponent_type == 'random':
                    start = 0
                    end = len(self.opponent_models) - 1
                    i = random.randint(start, end)
                    self.opponent_agent = Agent('ppo_opponent',
                                                self.opponent_models[i])

                elif self.opponent_type == 'best':
                    self.opponent_agent = Agent('ppo_opponent',
                                                self.opponent_models[-1])

                elif self.opponent_type == 'mostly_best':
                    j = random.uniform(0, 1)
                    if j < 0.8:
                        self.opponent_agent = Agent('ppo_opponent',
                                                    self.opponent_models[-1])
                    else:
                        start = 0
                        end = len(self.opponent_models) - 1
                        i = random.randint(start, end)
                        self.opponent_agent = Agent('ppo_opponent',
                                                    self.opponent_models[i])

                elif self.opponent_type == 'base':
                    self.opponent_agent = Agent('base',
                                                self.opponent_models[0])

            self.agent_player_num = np.random.choice(self.n_players)
            self.agents = [self.opponent_agent] * self.n_players
            self.agents[self.agent_player_num] = None
            try:
                # if self.players is defined on the base environment
                logger.debug(
                    f'Agent plays as Player {self.players[self.agent_player_num].id}'
                )
            except:
                pass
Example #15
0
    def check_game_over(self, board = None , player = None):

        if board is None:
            board = self.board

        if player is None:
            player = self.current_player_num

        for x,y,z,a in WINNERS:
            if self.square_is_player(board, x, player) and self.square_is_player(board, y, player) and self.square_is_player(board, z, player) and self.square_is_player(board, a, player):
                return 1, True

        if self.turns_taken == self.num_squares:
            logger.debug("Board full")
            return  0, True

        return 0, False #-0.01 here to encourage choosing the win?
    def reset(self):
        self.round = 0
        self.deck = Deck(self.contents)
        self.discard = Discard()
        self.players = []
        self.action_bank = []

        player_id = 1
        for p in range(self.n_players):
            self.players.append(Player(str(player_id)))
            player_id += 1

        self.current_player_num = 0
        self.done = False
        self.reset_round()
        logger.debug(f'\n\n---- NEW GAME ----')
        return self.observation
Example #17
0
    def play_card(self, card_num, player):

        card_name = self.contents[card_num]['info']['name']
        card = player.hand.pick(card_name)
        if card is None:
            logger.debug(f"Player {player.id} trying to play {card_num} but doesn't exist!")
            raise Exception('Card not found')

        logger.debug(f"Player {player.id} playing {str(card.order) + ': ' + card.symbol + ': ' + str(card.id)}")
        if card.type == 'nigiri':
            for c in player.position.cards:
                if c.type == 'wasabi' and c.played_upon == False:
                    c.played_upon = True
                    card.played_on_wasabi = True
                    break

        player.position.add([card])
Example #18
0
    def step(self, action):

        reward = [0] * self.n_players
        done = False
        draw_card = False

        # check move legality
        if self.legal_actions[action] == 0:
            reward = [1.0 / (self.n_players - 1)] * self.n_players
            reward[self.current_player_num] = -1
            done = True

        #play the card(s)
        else:
            if action == 0:
                logger.debug(f'\nPlayer chooses to play a counter')
                self.current_player.counters.remove(1)
                self.centre_counters.add(1)
                self.current_player_num = (self.current_player_num +
                                           1) % self.n_players

            else:
                logger.debug(
                    f'Player chooses to take card {self.centre_card.cards[0].symbol} and {self.centre_counters.size()} counters'
                )
                self.current_player.position.add(self.centre_card.cards)
                self.current_player.counters.add(self.centre_counters.size())
                self.centre_card.reset()
                self.centre_counters.reset()

                if self.deck.size() == self.deck_size_at_end:
                    reward = self.score_game()
                    done = True
                else:
                    if self.manual:
                        next_card = input('What card is drawn?: ')
                        self.centre_card.add(self.deck.pick(next_card))
                    else:
                        self.centre_card.add(self.deck.draw(1))

            self.turns_taken += 1

        self.done = done

        return self.observation, reward, done, {}
Example #19
0
        def continue_game(self):
            """Working with partial rewards and at the end of the round, sum up reward amount step"""
            adversary_round_rewards = [0, 0]
            if self.current_player_num != self.agent_player_num:
                self.render()
                action = self.current_agent.choose_action(
                    self, choose_best_action=False, mask_invalid_actions=False)
                #
                # if self.opponent_type == 'rules':
                #     assert(self.core.check_move(self.decode_action(action)),"Adversary rules based cannot play bad moves")

                observation, rewards, done, _ = super(SelfPlayEnvBlobwar,
                                                      self).step(action)
                # formatted_action = super(SelfPlayEnvBlobwar, self).decode_action(action)
                logger.debug(
                    f'Action played by adversary({"o" if self.agent_player_num==0 else "x"}): {action}'
                )
                for i in range(len(self.agents)):
                    adversary_round_rewards[i] = rewards[i]
            return observation, adversary_round_rewards, done, None
Example #20
0
    def reset(self):
        #pick a random board
        self.board = Board(random.choice(ALL_BOARDS))
        #reset players
        player_id = 1
        for p in range(self.n_players):
            player = Player(player_id)
            player.r_deck.shuffle()
            player.s_deck.shuffle()
            self.board.add_player(player)
            player_id += 1
        self.current_player_num = 0

        #TODO add initial position in learning
        self.set_start_positions()

        self.done = False
        self.last_turn = False
        logger.debug(f'\n\n---- NEW GAME ----')
        self.render_map(first_turn=True)
        self.draw_card()
        return self.observation
Example #21
0
        def step(self, action):
            self.render()
            observation, reward, done, _ = super(SelfPlayEnv, self).step(action)
            logger.debug(f'Action played by agent: {action}')
            logger.debug(f'Rewards: {reward}')
            logger.debug(f'Done: {done}')

            if not done:
                observation, reward, done, _ = self.continue_game()

            agent_reward = reward[self.agent_player_num]
            logger.debug(f'\nReward To Agent: {agent_reward}')

            if done:
                self.render()

            return observation, agent_reward, done, {} 
Example #22
0
    def score_maki(self, maki):
        logger.debug('\nMaki counts...')
        logger.debug(f'Maki: {maki}')

        maki_winners = self.get_limits(maki, 'max')

        for i in maki_winners:
            self.players[i].score += 6 // len(maki_winners)
            maki[i] = None #mask out the winners
            logger.debug(f'Player {self.players[i].id} 1st place maki: {6 // len(maki_winners)}')
        
        if len(maki_winners) == 1:
            #now get second place as winners are masked with None
            maki_winners = self.get_limits(maki, 'max')

            for i in maki_winners:
                self.players[i].score += 3 // len(maki_winners)
                logger.debug(f'Player {self.players[i].id} 2nd place maki: {3 // len(maki_winners)}')
    def choose_action(self, env, choose_best_action, mask_invalid_actions):
        if self.name == 'human':
            if env.render_mode is "print":
                while True:
                    try:
                        action = int(input('\nPlease choose an action: '))
                        break
                    except ValueError:
                        pass
            else:
                co_ords = np.array(env.fig.ginput(n=1, timeout=99999999)).ravel()
                # all_sites = np.array(list(product([0, 1, 2], repeat=2)))
                all_sites = np.array([[0, 0],
                                      [1, 0],
                                      [2, 0],
                                      [0, 1],
                                      [1, 1],
                                      [2, 1],
                                      [0, 2],
                                      [1, 2],
                                      [2, 2]])
                all_sites_dist = np.sqrt(
                    np.square((co_ords[0] - all_sites[:, 0])) + np.square((co_ords[1] - all_sites[:, 1])))
                action = np.argmin(all_sites_dist)
                sleep(0.2)
            return int(action)
        elif self.name == 'rules':
            action_probs = np.array(env.rules_move())
            value = None
        else:
            action_probs = self.model.action_probability(env.observation)
            value = self.model.policy_pi.value(np.array([env.observation]))[0]
            logger.debug(f'Value {value:.2f}')

        self.print_top_actions(action_probs)

        # if mask_invalid_actions:
        # action_probs = mask_actions(env.legal_actions, action_probs)
        # logger.debug('Masked ->')
        #     # self.print_top_actions(action_probs)

        action = np.argmax(action_probs)
        logger.debug(f'Best action {action}')

        if not choose_best_action:
            action = sample_action(action_probs)
            logger.debug(f'Sampled action {action} chosen')

        return int(action)
Example #24
0
    def choose_action(self, env, choose_best_action, mask_invalid_actions):
        if self.name == 'rules':
            action_probs = np.array(env.rules_move())
            value = None
        else:
            action_probs = self.model.action_probability(env.observation)
            value = self.model.policy_pi.value(np.array([env.observation]))[0]
            logger.debug(f'Value {value:.2f}')

        self.print_top_actions(action_probs)

        if mask_invalid_actions:
            action_probs = mask_actions(env.legal_actions, action_probs)
            logger.debug('Masked ->')
            self.print_top_actions(action_probs)

        action = np.argmax(action_probs)
        logger.debug(f'Best action {action}')

        if not choose_best_action:
            action = sample_action(action_probs)
            logger.debug(f'Sampled action {action} chosen')

        return action
def main():

    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    origin_name = "final_param"

    this_run_dir = get_dir_path_for_this_run(cma_args)
    plot_dir_alg = get_plot_dir(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir,
                                                      params_scope="pi")
    save_dir = get_save_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)
    if not os.path.exists(plot_dir_alg):
        os.makedirs(plot_dir_alg)

    start_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "pi_start")
    start_params = pd.read_csv(start_file, header=None).values[0]
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''
    pca_indexes = cma_args.other_pca_index
    pca_indexes = [int(pca_index) for pca_index in pca_indexes.split(":")]

    n_comp_to_project_on = pca_indexes
    result = do_pca(n_components=cma_args.n_components,
                    traj_params_dir_name=traj_params_dir_name,
                    intermediate_data_dir=intermediate_data_dir,
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size,
                    reuse=True)
    logger.debug("after pca")

    if origin_name == "final_param":
        origin_param = result["final_params"]
    elif origin_name == "start_param":
        origin_param = start_params
    else:
        origin_param = result["mean_param"]

    proj_coords = project(result["pcs_components"],
                          pcs_slice=n_comp_to_project_on,
                          origin_name=origin_name,
                          origin_param=origin_param,
                          IPCA_chunk_size=cma_args.chunk_size,
                          traj_params_dir_name=traj_params_dir_name,
                          intermediate_data_dir=intermediate_data_dir,
                          n_components=cma_args.n_components,
                          reuse=True)
    '''
    ==========================================================================================
    eval all xy coords
    ==========================================================================================
    '''
    other_pcs_plot_dir = get_other_pcs_plane_plot_dir(plot_dir_alg,
                                                      pca_indexes)

    if not os.path.exists(other_pcs_plot_dir):
        os.makedirs(other_pcs_plot_dir)

    plot_3d_trajectory_path_only(
        other_pcs_plot_dir,
        f"{pca_indexes}_final_origin_3d_path_plot",
        proj_coords,
        explained_ratio=result["explained_variance_ratio"][pca_indexes])
Example #26
0
    def render(self, mode='human', close=False):
        
        if close:
            return

        if not self.done:
            logger.debug(f'\n\n-------TURN {self.turns_taken + 1}-----------')
            logger.debug(f"It is Player {self.current_player.id}'s turn to choose")
        else:
            logger.debug(f'\n\n-------FINAL POSITION-----------')
        
        out = '\n'
        for square in range(self.squares):
            if self.board.hudson == square:
                if self.board.hudson_facing == 'R':
                    out += '>H>\t'
                elif self.board.hudson_facing == 'L':
                    out += '<H<\t'
                elif self.board.hudson_facing == 'U':
                    out += '^H^\t'
                elif self.board.hudson_facing == 'D':
                    out += 'vHv\t'
            elif self.board.tiles[square] == None:
                if self.board.nets[square]:
                    out += '-🥅-\t'
                else:
                    out += '---\t'
            else:
                out += self.board.tiles[square].symbol + ':' + str(self.board.tiles[square].id) + '\t' 

            if square % self.board_size == self.board_size - 1:
                logger.debug(out)
                out = ''
            
        logger.debug('\n')


        for p in self.players:
            logger.debug(f'Player {p.id}\'s position')
            if p.position.size() > 0:

                out = '  '.join([tile.symbol for tile in sorted(p.position.tiles, key=lambda x: x.id) if tile.type != 'cricket'])
                out += '  ' + '  '.join([tile.symbol for tile in p.position.tiles if tile.type == 'cricket'])

                logger.debug(out)
            else:
                logger.debug('Empty')

        logger.debug(f'\n{self.drawbag.size()} tiles left in drawbag')

        if self.verbose:
            obs_sparse = [i if o == 1 else (i,o) for i,o in enumerate(self.observation) if o != 0]
            logger.debug(f'\nObservation: \n{obs_sparse}')

        if self.done:
            logger.debug(f'\n\nGAME OVER')
        else:
            logger.debug(f'\nLegal actions: {[i for i,o in enumerate(self.legal_actions) if o != 0]}')
        
        logger.debug(f'\n')

        for p in self.players:
            logger.debug(f'Player {p.id} points: {p.position.score}')
Example #27
0
 def choose_net_tile(self):
     logger.debug(f'Player {self.current_player.id} choosing extra tile using net')
     self.current_player.position.add(self.drawbag.draw(1))
    def render(self, mode='human', close=False):

        if close:
            return

        if self.turns_taken < self.cards_per_player:
            logger.debug(
                f'\n\n-------ROUND {self.round} : TURN {self.turns_taken + 1}-----------'
            )
            logger.debug(
                f"It is Player {self.current_player.id}'s turn to choose")
        else:
            logger.debug(
                f'\n\n-------FINAL ROUND {self.round} POSITION-----------')

        for p in self.players:
            logger.debug(f'\nPlayer {p.id}\'s hand')
            if p.hand.size() > 0:
                logger.debug('  '.join([
                    str(card.order) + ': ' + card.symbol
                    for card in sorted(p.hand.cards, key=lambda x: x.id)
                ]))
            else:
                logger.debug('Empty')

            logger.debug(f'Player {p.id}\'s position')
            if p.position.size() > 0:
                logger.debug('  '.join([
                    str(card.order) + ': ' + card.symbol + ': ' + str(card.id)
                    for card in sorted(p.position.cards, key=lambda x: x.id)
                ]))
            else:
                logger.debug('Empty')

        logger.debug(f'\n{self.deck.size()} cards left in deck')
        logger.debug(f'{self.discard.size()} cards discarded')

        if self.verbose:
            logger.debug(
                f'\nObservation: \n{[i if o == 1 else (i,o) for i,o in enumerate(self.observation) if o != 0]}'
            )

        if not self.done:
            logger.debug(
                f'\nLegal actions: {[i for i,o in enumerate(self.legal_actions) if o != 0]}'
            )

        if self.done:
            logger.debug(f'\n\nGAME OVER')

        if self.turns_taken == self.cards_per_player:
            for p in self.players:
                logger.debug(f'Player {p.id} points: {p.score}')
def main():

    # requires  n_comp_to_use, pc1_chunk_size
    import sys
    logger.log(sys.argv)
    common_arg_parser = get_common_parser()
    cma_args, cma_unknown_args = common_arg_parser.parse_known_args()

    this_run_dir = get_dir_path_for_this_run(cma_args)

    traj_params_dir_name = get_full_params_dir(this_run_dir)
    intermediate_data_dir = get_intermediate_data_dir(this_run_dir)

    if not os.path.exists(intermediate_data_dir):
        os.makedirs(intermediate_data_dir)

    logger.log("grab final params")
    final_file = get_full_param_traj_file_path(traj_params_dir_name, "final")
    final_params = pd.read_csv(final_file, header=None).values[0]

    logger.log("grab start params")
    start_file = get_full_param_traj_file_path(traj_params_dir_name, "start")
    start_params = pd.read_csv(start_file, header=None).values[0]

    V = final_params - start_params
    '''
    ==========================================================================================
    get the pc vectors
    ==========================================================================================
    '''

    result = do_pca(cma_args.n_components,
                    cma_args.n_comp_to_use,
                    traj_params_dir_name,
                    intermediate_data_dir,
                    proj=False,
                    origin="mean_param",
                    use_IPCA=cma_args.use_IPCA,
                    chunk_size=cma_args.chunk_size,
                    reuse=True)
    logger.debug("after pca")

    final_plane = result["first_n_pcs"]

    count_file = get_full_param_traj_file_path(traj_params_dir_name,
                                               "total_num_dumped")
    total_num = pd.read_csv(count_file, header=None).values[0]

    all_param_iterator = get_allinone_concat_df(
        dir_name=traj_params_dir_name,
        use_IPCA=True,
        chunk_size=cma_args.pc1_chunk_size)
    unduped_angles_along_the_way = []
    duped_angles_along_the_way = []
    diff_along = []

    unweighted_pc1_vs_V_angles = []
    duped_pc1_vs_V_angles = []
    pc1_vs_V_diffs = []

    unweighted_ipca = IncrementalPCA(
        n_components=cma_args.n_comp_to_use)  # for sparse PCA to speed up

    all_matrix_buffer = []

    try:
        i = -1
        for chunk in all_param_iterator:
            i += 1
            if i >= 2:
                break
            chunk = chunk.values
            unweighted_ipca.partial_fit(chunk)
            unweighted_angle = cal_angle_between_nd_planes(
                final_plane,
                unweighted_ipca.components_[:cma_args.n_comp_to_use])
            unweighted_pc1_vs_V_angle = postize_angle(
                cal_angle_between_nd_planes(V, unweighted_ipca.components_[0]))

            unweighted_pc1_vs_V_angles.append(unweighted_pc1_vs_V_angle)

            #TODO ignore 90 or 180 for now
            if unweighted_angle > 90:
                unweighted_angle = 180 - unweighted_angle
            unduped_angles_along_the_way.append(unweighted_angle)

            np.testing.assert_almost_equal(
                cal_angle_between_nd_planes(
                    unweighted_ipca.components_[:cma_args.n_comp_to_use][0],
                    final_plane[0]),
                cal_angle(
                    unweighted_ipca.components_[:cma_args.n_comp_to_use][0],
                    final_plane[0]))

            all_matrix_buffer.extend(chunk)

            weights = gen_weights(all_matrix_buffer,
                                  Funcs[cma_args.func_index_to_use])
            logger.log(f"currently at {all_param_iterator._currow}")
            # ipca = PCA(n_components=1)  # for sparse PCA to speed up
            # ipca.fit(duped_in_so_far)
            wpca = WPCA(n_components=cma_args.n_comp_to_use
                        )  # for sparse PCA to speed up
            tic = time.time()
            wpca.fit(all_matrix_buffer, weights=weights)
            toc = time.time()

            logger.debug(
                f"WPCA of {len(all_matrix_buffer)} data took {toc - tic} secs "
            )
            duped_angle = cal_angle_between_nd_planes(
                final_plane, wpca.components_[:cma_args.n_comp_to_use])

            duped_pc1_vs_V_angle = postize_angle(
                cal_angle_between_nd_planes(V, wpca.components_[0]))
            duped_pc1_vs_V_angles.append(duped_pc1_vs_V_angle)
            pc1_vs_V_diffs.append(duped_pc1_vs_V_angle -
                                  unweighted_pc1_vs_V_angle)

            #TODO ignore 90 or 180 for now
            if duped_angle > 90:
                duped_angle = 180 - duped_angle
            duped_angles_along_the_way.append(duped_angle)
            diff_along.append(unweighted_angle - duped_angle)
    finally:
        plot_dir = get_plot_dir(cma_args)
        if not os.path.exists(plot_dir):
            os.makedirs(plot_dir)

        angles_plot_name = f"WPCA" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(duped_angles_along_the_way)),
                duped_angles_along_the_way, "num of chunks",
                "angle with diff in degrees", False)

        angles_plot_name = f"Not WPCA exponential 2" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(unduped_angles_along_the_way)),
                unduped_angles_along_the_way, "num of chunks",
                "angle with diff in degrees", False)


        angles_plot_name = f"Not WPCA - WPCA diff_along exponential 2," \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name, np.arange(len(diff_along)),
                diff_along, "num of chunks", "angle with diff in degrees",
                False)




        angles_plot_name = f"PC1 VS VWPCA PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(duped_pc1_vs_V_angles)), duped_pc1_vs_V_angles,
                "num of chunks", "angle with diff in degrees", False)

        angles_plot_name = f"PC1 VS VNot WPCA PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name,
                np.arange(len(unweighted_pc1_vs_V_angles)),
                unweighted_pc1_vs_V_angles, "num of chunks",
                "angle with diff in degrees", False)


        angles_plot_name = f"PC1 VS VNot WPCA - WPCA diff PC1 VS V" \
                           f"cma_args.pc1_chunk_size: {cma_args.pc1_chunk_size} "
        plot_2d(plot_dir, angles_plot_name, np.arange(len(pc1_vs_V_diffs)),
                pc1_vs_V_diffs, "num of chunks", "angle with diff in degrees",
                False)

        del all_matrix_buffer
        import gc
        gc.collect()
 def pickup_chopsticks(self, player):
     logger.debug(f'Player {player.id} picking up chopsticks')
     chopsticks = player.position.pick('chopsticks')
     player.hand.add([chopsticks])