def make_move(self, board, cardDeck, gameEnd): self.state = board.getState() + cardDeck.getState() if gameEnd: new_state = board.getState() + cardDeck.getState() return new_state p = random.uniform(0, 1) if p < self.exp_factor: new_state = self.make_optimal_move(board, cardDeck) else: moves = board.getValidMoves() idx = random.choice(moves) board.makeMove(idx, cardDeck.currentCard) new_state = board.getState() + cardDeck.getState() return new_state
def learn_state(self, board, cardDeck, gameEnd): state = board.getState() + cardDeck.getState() target = self.calc_target(board, cardDeck, gameEnd) self.train_model(target, 10) self.prev_state = state
def make_optimal_move(self, board, cardDeck): moves = board.getValidMoves() if len(moves) == 1: board.makeMove(moves[0], cardDeck) return board.getState() + cardDeck.getState() temp_move_list = [] v = -float('Inf') for idx in moves: v_temp = [] temp_state = board.previewMove(idx, cardDeck.currentCard) simCardDraws = 50 while simCardDraws > 0: simCardDraws -= 1 temp_state_op = temp_state + cardDeck.previewNextCard() v_temp.append(self.calc_value(temp_state_op)) # deletes Nones v_temp = list(filter(None.__ne__, v_temp)) if len(v_temp) != 0: v_temp = np.average(v_temp) else: # encourage exploration v_temp = 50 if v_temp > v: temp_move_list = [idx] v = v_temp elif v_temp == v: temp_move_list.append(idx) try: board.makeMove(random.choice(temp_move_list), cardDeck) except ValueError: print('temp state:', temp_state_list) raise Exception('temp state empty') return board.getState() + cardDeck.getState()
def calc_target(self, board, cardDeck, gameEnd): v_s = self.calc_value(self.prev_state) R = self.reward(board) if gameEnd: v_s_tag = 0 else: v_s_tag = self.calc_value(board.getState() + cardDeck.getState()) target = np.array(v_s + self.alpha * (R + v_s_tag - v_s)) return target
def make_move(self, board, cardDeck, gameEnd): idx = int(input('Choose station number: ')) board.makeMove(idx, cardDeck.currentCard) return board.getState() + cardDeck.getState()