예제 #1
0
파일: agent.py 프로젝트: dushufan/tetrai
    def run(self, board: Board):
        """
        The model performs random actions until there is sufficient data in replay memory
        Then, the model will also train off replay memory after every action
        """

        sub = 0
        max_score = 0
        total = 0

        for e in range(self.episodes):
            print('Running episode', e)
            board.reset_board()

            game_over = False
            total_score = 0
            print('Memories', len(self.memory.memories))
            print('Epsilon', self.epsilon)
            while not game_over:
                current_state = Board.get_game_state(board._blocks)
                new_board, new_state = self.predict(board)

                reward = 0
                if new_board == -1:
                    game_over = True
                else:
                    reward, game_over = board.do_move(new_board)

                total_score += reward

                self.memory.remember(
                    (current_state, new_state, reward, game_over))

            print('Score', total_score)
            self.train()
            self.epsilon -= self.decay

            total += total_score
            max_score = total_score if total_score > max_score else max_score
            if total_score < 100:
                sub += 1

        # generate summary
        print('Max score', max_score)
        print('Avg score', total / self.episodes)
        print('Sub 100', sub)
        self.save()
예제 #2
0
파일: agent.py 프로젝트: dma23/tetris.ai
    def run(self, board: Board):
        sub100 = 0
        max_score = 0
        total = 0

        for e in range(self.episodes):
            print('Iteration: ', e)
            board.reset_board()

            game_over = False
            total_score = 0
            #print('Memories', len(self.memory.memories))
            #print('Epsilon', self.epsilon)
            while not game_over:
                current_state = Board.get_game_state(board._blocks)
                new_board, new_state = self.predict(board)

                reward = 0
                if new_board == -1:
                    game_over = True
                else:
                    reward, game_over = board.do_move(new_board)

                total_score += reward

                self.memory.remember(
                    (current_state, new_state, reward, game_over))

            print('Score', total_score)
            self.train()
            self.epsilon -= self.decay

            total += total_score
            max_score = total_score if total_score > max_score else max_score
            if total_score < 100:
                sub100 += 1

        # generate summary
        print('Max score', max_score)
        print('Avg score', total / self.episodes)
        print('Sub 100', sub100)
        self.save()
예제 #3
0
    def play(self, board: Board):
        possible_boards = board.get_next()
        possible_states = []
        for b in possible_boards:
            possible_states.append(Board.get_game_state(b))
        if len(possible_boards) == 0:
            board.reset_board()
            return

        q_values = []
        for s in possible_states:
            s = np.reshape(s, (1, -1))
            q_values.append(self.model.predict(s)[0])
        m = np.argmax(q_values)

        best_board = possible_boards[m]
        reward, game_over = board.do_move(best_board)
        if game_over:
            self.score = 0
        else:
            self.score += reward