Exemple #1
0
    def initializedf_6(self):
        def find_successor_X(row):
            O_same = (df_6.loc[(df_6[row.index[row == 'O']] == 'O').all(
                axis=1)].drop(columns=["reward_X"]))
            diff_count = (O_same != (row.drop(["reward_X"]))).sum(axis=1)
            return diff_count.index[diff_count == 2].tolist()

        def find_successor_O(row):
            X_same = (df_6.loc[(df_6[row.index[row == 'X']] == 'X').all(
                axis=1)].drop(columns=["reward_X", "successor_X"]))
            diff_count = (X_same != row.drop(["reward_X", "successor_X"])).sum(
                axis=1)
            return diff_count.index[diff_count == 2].tolist()

        six_marks = np.array([' ', ' ', ' ', 'X', 'X', 'X', 'O', 'O', 'O'])
        df_6 = pd.DataFrame(list(perm(six_marks)))
        df_6["reward_X"] = df_6.apply(self.initialize_reward, axis=1)
        df_6 = df_6.loc[df_6["reward_X"] != 101].reset_index(
            drop=True
        )  # Remove all impossible cases identified in function initialize_ward
        df_6["successor_X"] = df_6.apply(find_successor_X, axis=1)
        df_6["successor_O"] = df_6.apply(find_successor_O, axis=1)
        df_6["game_over"] = df_6["reward_X"] != 0

        return df_6
Exemple #2
0
    def initializedf_3(self):
        def find_successor_O(row):
            df_4_states = self.df_4.drop(
                columns=["reward_X", "successor_X", "game_over"])
            diff_count = (df_4_states != row.drop(["reward_X"])).sum(axis=1)
            return diff_count.index[diff_count == 1].tolist()

        three_marks = np.array([' ', ' ', ' ', ' ', ' ', ' ', 'X', 'X', 'O'])
        df_3 = pd.DataFrame(list(perm(three_marks)))
        df_3[
            "reward_X"] = 0  # Impossible to win with at most two marks for each player
        df_3["successor_O"] = df_3.apply(find_successor_O, axis=1)
        df_3[
            "game_over"] = False  # Impossible to have a game over state with first three hands

        return df_3
Exemple #3
0
    def initializedf_0(self):
        def find_successor_X(row):
            df_1_states = self.df_1.drop(
                columns=["reward_X", "successor_O", "game_over"])
            diff_count = (df_1_states != row.drop(["reward_X"])).sum(axis=1)
            return diff_count.index[diff_count == 1].tolist()

        no_marks = np.array([' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '])
        df_0 = pd.DataFrame(list(perm(no_marks)))
        df_0[
            "reward_X"] = 0  # Impossible to win with at most two marks for each player
        df_0["successor_X"] = df_0.apply(find_successor_X, axis=1)
        df_0[
            "game_over"] = False  # Impossible to have a game over state with no marks

        return df_0
Exemple #4
0
    def initializedf_5(self):
        def find_successor_O(row):
            df_6_states = self.df_6.drop(columns=[
                "reward_X", "successor_X", "successor_O", "game_over"
            ])
            diff_count = (df_6_states != row.drop(["reward_X"])).sum(axis=1)
            return diff_count.index[diff_count == 1].tolist()

        five_marks = np.array([' ', ' ', ' ', ' ', 'X', 'X', 'X', 'O', 'O'])
        df_5 = pd.DataFrame(list(perm(five_marks)))
        df_5["reward_X"] = df_5.apply(self.initialize_reward, axis=1)
        # Remove all impossible cases identified in function initialize_ward
        df_5 = df_5.loc[df_5["reward_X"] != 101].reset_index(drop=True)
        df_5["successor_O"] = df_5.apply(find_successor_O, axis=1)
        df_5["game_over"] = df_5["reward_X"] != 0

        return df_5