예제 #1
0
    def make_move(self, board, cardDeck, gameEnd):
        self.state = board.getState() + cardDeck.getState()

        if gameEnd:
            new_state = board.getState() + cardDeck.getState()
            return new_state

        p = random.uniform(0, 1)
        if p < self.exp_factor:
            new_state = self.make_optimal_move(board, cardDeck)
        else:
            moves = board.getValidMoves()
            idx = random.choice(moves)
            board.makeMove(idx, cardDeck.currentCard)
            new_state = board.getState() + cardDeck.getState()
        return new_state
예제 #2
0
    def learn_state(self, board, cardDeck, gameEnd):

        state = board.getState() + cardDeck.getState()

        target = self.calc_target(board, cardDeck, gameEnd)

        self.train_model(target, 10)

        self.prev_state = state
예제 #3
0
    def make_optimal_move(self, board, cardDeck):
        moves = board.getValidMoves()
        if len(moves) == 1:
            board.makeMove(moves[0], cardDeck)
            return board.getState() + cardDeck.getState()

        temp_move_list = []
        v = -float('Inf')

        for idx in moves:

            v_temp = []
            temp_state = board.previewMove(idx, cardDeck.currentCard)

            simCardDraws = 50
            while simCardDraws > 0:
                simCardDraws -= 1
                temp_state_op = temp_state + cardDeck.previewNextCard()
                v_temp.append(self.calc_value(temp_state_op))

            # deletes Nones
            v_temp = list(filter(None.__ne__, v_temp))

            if len(v_temp) != 0:
                v_temp = np.average(v_temp)
            else:
                # encourage exploration
                v_temp = 50

            if v_temp > v:
                temp_move_list = [idx]
                v = v_temp
            elif v_temp == v:
                temp_move_list.append(idx)

        try:
            board.makeMove(random.choice(temp_move_list), cardDeck)
        except ValueError:
            print('temp state:', temp_state_list)
            raise Exception('temp state empty')

        return board.getState() + cardDeck.getState()
예제 #4
0
    def calc_target(self, board, cardDeck, gameEnd):
        v_s = self.calc_value(self.prev_state)

        R = self.reward(board)

        if gameEnd:
            v_s_tag = 0
        else:
            v_s_tag = self.calc_value(board.getState() + cardDeck.getState())

        target = np.array(v_s + self.alpha * (R + v_s_tag - v_s))

        return target
예제 #5
0
 def make_move(self, board, cardDeck, gameEnd):
     idx = int(input('Choose station number: '))
     board.makeMove(idx, cardDeck.currentCard)
     return board.getState() + cardDeck.getState()