def initializedf_6(self): def find_successor_X(row): O_same = (df_6.loc[(df_6[row.index[row == 'O']] == 'O').all( axis=1)].drop(columns=["reward_X"])) diff_count = (O_same != (row.drop(["reward_X"]))).sum(axis=1) return diff_count.index[diff_count == 2].tolist() def find_successor_O(row): X_same = (df_6.loc[(df_6[row.index[row == 'X']] == 'X').all( axis=1)].drop(columns=["reward_X", "successor_X"])) diff_count = (X_same != row.drop(["reward_X", "successor_X"])).sum( axis=1) return diff_count.index[diff_count == 2].tolist() six_marks = np.array([' ', ' ', ' ', 'X', 'X', 'X', 'O', 'O', 'O']) df_6 = pd.DataFrame(list(perm(six_marks))) df_6["reward_X"] = df_6.apply(self.initialize_reward, axis=1) df_6 = df_6.loc[df_6["reward_X"] != 101].reset_index( drop=True ) # Remove all impossible cases identified in function initialize_ward df_6["successor_X"] = df_6.apply(find_successor_X, axis=1) df_6["successor_O"] = df_6.apply(find_successor_O, axis=1) df_6["game_over"] = df_6["reward_X"] != 0 return df_6
def initializedf_3(self): def find_successor_O(row): df_4_states = self.df_4.drop( columns=["reward_X", "successor_X", "game_over"]) diff_count = (df_4_states != row.drop(["reward_X"])).sum(axis=1) return diff_count.index[diff_count == 1].tolist() three_marks = np.array([' ', ' ', ' ', ' ', ' ', ' ', 'X', 'X', 'O']) df_3 = pd.DataFrame(list(perm(three_marks))) df_3[ "reward_X"] = 0 # Impossible to win with at most two marks for each player df_3["successor_O"] = df_3.apply(find_successor_O, axis=1) df_3[ "game_over"] = False # Impossible to have a game over state with first three hands return df_3
def initializedf_0(self): def find_successor_X(row): df_1_states = self.df_1.drop( columns=["reward_X", "successor_O", "game_over"]) diff_count = (df_1_states != row.drop(["reward_X"])).sum(axis=1) return diff_count.index[diff_count == 1].tolist() no_marks = np.array([' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']) df_0 = pd.DataFrame(list(perm(no_marks))) df_0[ "reward_X"] = 0 # Impossible to win with at most two marks for each player df_0["successor_X"] = df_0.apply(find_successor_X, axis=1) df_0[ "game_over"] = False # Impossible to have a game over state with no marks return df_0
def initializedf_5(self): def find_successor_O(row): df_6_states = self.df_6.drop(columns=[ "reward_X", "successor_X", "successor_O", "game_over" ]) diff_count = (df_6_states != row.drop(["reward_X"])).sum(axis=1) return diff_count.index[diff_count == 1].tolist() five_marks = np.array([' ', ' ', ' ', ' ', 'X', 'X', 'X', 'O', 'O']) df_5 = pd.DataFrame(list(perm(five_marks))) df_5["reward_X"] = df_5.apply(self.initialize_reward, axis=1) # Remove all impossible cases identified in function initialize_ward df_5 = df_5.loc[df_5["reward_X"] != 101].reset_index(drop=True) df_5["successor_O"] = df_5.apply(find_successor_O, axis=1) df_5["game_over"] = df_5["reward_X"] != 0 return df_5