def feedReward(self, game_result): if game_result == 1: reward = self.win_reward elif game_result == 0: reward = self.draw_reward elif game_result == -1: reward = self.lose_reward if self.sign == State.X: try: #load states_value from a second QlearningPlayer loaded_states_value = load( "policy_3x3_100000_round_against_RandomPlayer_secondplayer" ) except: print("There's no proper file for see the future.") else: try: #load states_value from a second QlearningPlayer loaded_states_value = load( "policy_3x3_100000_round_against_RandomPlayer_firstplayer") except: print("There's no proper file for see the future.") states_rev = self.states[::-1] for i in range(len(self.states)): st = states_rev[i] if self.states_value.get(st) is None: self.states_value[st] = 0 if i != len(self.states) - 1: st_before_hash = states_rev[i + 1] st_before = self.getBoardStateFromHash(st_before_hash) b = Board(self.board.size) b.table = st_before else: b = Board(self.board.size) possible_q_values = [] for poss_move in b.empty_cells(): b.table[poss_move[0]][poss_move[1]].state = self.rival_sign possible_boardState_hash = self.getHash(b.table) if self.sign == State.X: try: value = loaded_states_value.get( possible_boardState_hash) except: value = None else: try: value = loaded_states_value.get( possible_boardState_hash) except: value = None if value != None: possible_q_values.append(value) else: possible_q_values.append(0) possible_q_value = max(possible_q_values) self.states_value[st] = ( 1 - self.alpha) * self.states_value[st] + self.alpha * ( reward + self.decay_gamma * possible_q_value) #reward = self.states_value[st] self.states = []
def trainingClassifierPlayer(self, X_file, Y_Xmoves_file, Y_Ymoves_file, start_player, boardSize, classifier_num=1): b = Board(boardSize) X = load(X_file) Y_Xmoves = load(Y_Xmoves_file) Y_Ymoves = load(Y_Ymoves_file) if start_player: player1 = RandomForestClassifierPlayer(None, None, X, Y_Xmoves, Y_Ymoves, classifier_num) player2 = RandomPlayer() testGame(player1, player2, b, 1000) print("saving...") player1.saveModels() print("saved") else: player2 = RandomForestClassifierPlayer(None, None, X, Y_Xmoves, Y_Ymoves, classifier_num) player1 = RandomPlayer() testGame(player1, player2, b, 1000) print("saving...") player2.saveModels() print("saved")
def __init__(self, board): BasePlayer.__init__(self, board) X = load("X_random_random_secondplayer_4x4_10000game") Y_Xmoves = load("YX_random_random_secondplayer_4x4_10000game") Y_Ymoves = load("YY_random_random_secondplayer_4x4_10000game") self.clf_x = self.training(X, np.ravel(Y_Xmoves)) self.clf_y = self.training(X, np.ravel(Y_Ymoves))
def __init__(self, clf_x_file, clf_y_file, X = [], Y_Xmoves = [], Y_Ymoves = [], classifier_num = 1): BasePlayer.__init__(self) self.classifier_num = classifier_num if len(X) != 0 and len(Y_Xmoves) != 0 and len(Y_Ymoves) != 0: self.clf_x = self.training(X, np.ravel(Y_Xmoves)) self.clf_y = self.training(X, np.ravel(Y_Ymoves)) elif clf_x_file != None and clf_y_file != None: self.clf_x = load(clf_x_file) self.clf_y = load(clf_y_file) else: raise Exception("You must train or load two ML model") self.name = "RandomForestClassifierPlayer"
def __init__(self, learning_file=None, alpha=0.2, epszilon=0.3, gamma=0.9, win_reward=1, draw_reward=0.3, lose_reward=0): BasePlayer.__init__(self) self.states = [] # record all positions taken self.alpha = alpha self.decay_gamma = gamma self.win_reward = win_reward self.draw_reward = draw_reward self.lose_reward = lose_reward self.name = "QLearningPlayer" if learning_file == None: self.states_value = {} # state -> value self.exp_rate = epszilon else: self.states_value = load(learning_file) self.exp_rate = 0.0