def __init__(self):
        self.report_text = []
        self.points_distance_history = []
        self.pass_flag = 0

        self.player = RandomPlayer("Random player")
        self.opponent = Agent("AI", ValueNetwork, PolicyNetwork)
    def start_match(self):
        self.user = User("human")
        self.opponent = Agent("AI", ValueNetwork, PolicyNetwork)

        game = Game(self.user, self.opponent)

        while self.user.points > 0 and self.opponent.points > 0:
            self.ui.PlayerScoreNumber.setText(str(self.user.points))
            self.ui.OpponentScoreNumber.setText(str(self.opponent.points))

            round = Round([self.user, self.opponent], game.player_turn)
            round.start()

            for step in range(4):
                # Set some info text
                self.ui.BetValueNumber.setText(str(round.curr_bet))
                self.ui.PlayerScoreNumber.setText(str(self.user.points))
                self.ui.OpponentScoreNumber.setText(str(self.opponent.points))

                self.ui.card1_hand1.setText(str(self.user.hand.cards[0]))
                self.ui.card2_hand1.setText(str(self.user.hand.cards[1]))

                self.ui.card1_board.setText(str(round.board.cards[0]))
                self.ui.card2_board.setText(str(round.board.cards[1]))
                if step > 0:
                    self.ui.card3_board.setText(str(round.board.cards[2]))
                if step > 1:
                    self.ui.card4_board.setText(str(round.board.cards[3]))
                if step > 2:
                    self.ui.card5_board.setText(str(round.board.cards[4]))

                # Make steps
                if game.player_turn == 0:
                    # Waiting for response
                    while self.human_act_status == 3:
                        QCoreApplication.processEvents()
                        self.ui.statusbar.showMessage(
                            "waiting for human step...")

                    # Acting
                    action = self.human_act_status
                    round.take_action(0, action)
                    self.report_text.append("Human: " + str(action) + '\n')
                    self.ui.ReportText.addItem("Human: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("AI wins" + '\n')
                        self.ui.ReportText.addItem("AI wins")
                        self.human_act_status = 3
                        break

                    action = self.opponent.act(self.opponent.hand, round.board)
                    round.take_action(1, action)
                    self.report_text.append("AI: " + str(action) + '\n')
                    self.ui.ReportText.addItem("AI: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("Human wins" + '\n')
                        self.ui.ReportText.addItem("Human wins")
                        self.human_act_status = 3
                        break

                    # Reset act status and switch turn
                    self.human_act_status = 3

                elif game.player_turn == 1:
                    # Acting
                    action = self.opponent.act(self.opponent.hand, round.board)
                    round.take_action(1, action)
                    self.report_text.append("AI: " + str(action) + '\n')
                    self.ui.ReportText.addItem("AI: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("Human wins" + '\n')
                        self.ui.ReportText.addItem("Human wins")
                        self.human_act_status = 3
                        break

                    # Waiting for response
                    while self.human_act_status == 3:
                        QCoreApplication.processEvents()
                        self.ui.statusbar.showMessage(
                            "waiting for human step...")

                    action = self.human_act_status
                    round.take_action(0, action)
                    self.report_text.append("Human: " + str(action) + '\n')
                    self.ui.ReportText.addItem("Human: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("AI wins" + '\n')
                        self.ui.ReportText.addItem("AI wins")
                        self.human_act_status = 3
                        break

                    # Reset act status and switch turn
                    self.human_act_status = 3

                round.open_card()

            # Calculate the round results
            self.ui.ReportText.addItem("Results:")
            self.ui.ReportText.addItem("• Human hand: " + str(self.user.hand))
            self.ui.ReportText.addItem("  AI hand:    " +
                                       str(self.opponent.hand))
            self.ui.ReportText.addItem("  Board:      " + str(round.board))
            self.ui.ReportText.addItem("• Human combination:")
            if self.user.hand.check_combination(round.board) > 14:
                self.ui.ReportText.addItem(
                    combinations[self.user.hand.check_combination(
                        round.board)])
            else:
                self.ui.ReportText.addItem("Oldest card:")
            self.ui.ReportText.addItem("  AI combination:")
            if self.opponent.hand.check_combination(round.board) > 14:
                self.ui.ReportText.addItem(
                    combinations[self.opponent.hand.check_combination(
                        round.board)])
            else:
                self.ui.ReportText.addItem("Oldest card:")

            if self.pass_flag == 0:
                winner = round.summarize()
                if winner == 2:
                    self.report_text.append("Human wins" + '\n')
                    self.ui.ReportText.addItem("Result: " + "Human wins")
                elif winner == 0:
                    self.report_text.append("AI wins" + '\n')
                    self.ui.ReportText.addItem("Result: " + "AI wins")
                else:
                    self.report_text.append("Draw" + '\n')
                    self.ui.ReportText.addItem("Result: " + "Draw")
            else:
                self.pass_flag = 0
            self.ui.ReportText.addItem(
                "_________________________________________")

            game.player_turn = 1 - game.player_turn
            self.ui.card1_hand1.clear()
            self.ui.card2_hand1.clear()

            self.ui.card1_board.clear()
            self.ui.card2_board.clear()
            self.ui.card3_board.clear()
            self.ui.card4_board.clear()
            self.ui.card5_board.clear()
class RandomMatch:
    def __init__(self):
        self.report_text = []
        self.points_distance_history = []
        self.pass_flag = 0

        self.player = RandomPlayer("Random player")
        self.opponent = Agent("AI", ValueNetwork, PolicyNetwork)

    def start_match(self):
        game = Game(self.player, self.opponent)

        while self.player.points > 0 and self.opponent.points > 0:
            self.points_distance_history.append(self.player.points -
                                                self.opponent.points)
            print("Player: " + str(self.player.points) + "  Opponent: " +
                  str(self.opponent.points))
            round = Round([self.player, self.opponent], game.player_turn)
            round.start()

            for step in range(4):
                # Make steps
                if game.player_turn == 0:
                    # Acting
                    action = self.player.act()
                    round.take_action(0, action)
                    self.report_text.append("Player: " + str(action) + '\n')
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("AI wins" + '\n')
                        game.player_turn = 1
                        break

                    action = self.opponent.act(self.opponent.hand, round.board)
                    round.take_action(1, action)
                    self.report_text.append("AI: " + str(action) + '\n')
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("Player wins" + '\n')
                        game.player_turn = 1
                        break

                    # Switch turn
                    game.player_turn = 1

                elif game.player_turn == 1:
                    # Acting
                    action = self.opponent.act(self.opponent.hand, round.board)
                    round.take_action(1, action)
                    self.report_text.append("AI: " + str(action) + '\n')
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("Player wins" + '\n')
                        game.player_turn = 0
                        break

                    action = self.player.act()
                    round.take_action(0, action)
                    self.report_text.append("Player: " + str(action) + '\n')
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("AI wins" + '\n')
                        game.player_turn = 0
                        break

                    # Reset act status and switch turn
                    game.player_turn = 0

                round.open_card()
            if self.pass_flag == 0:
                winner = round.summarize()
                if winner == 2:
                    self.report_text.append("Player wins" + '\n')
                elif winner == 0:
                    self.report_text.append("AI wins" + '\n')
                else:
                    self.report_text.append("Draw" + '\n')
            else:
                self.pass_flag = 0

    ## Visualize results of the match
    def visualize_results(self):
        plt.plot(self.points_distance_history)
        plt.title('Points change')
        plt.ylabel('points difference')
        plt.xlabel('round')
        plt.show()

    ## Saves the report about the last match
    def save_report(self):
        file = open("reports\\random_report.txt", 'w')
        file.writelines(self.report_text)
        file.close()
Beispiel #4
0
            states.append(s)
            actions.append(a)
            rewards.append(r)
            pb_sas.append(action_probs[a])
            s = poly.fit_transform(next_state.reshape(1, -1))

        rewards[-1] = rewards[-1] * 10
        dataset.episodes.append(Episode(states, actions, rewards, pb_sas))
    return dataset


if __name__ == '__main__':
    # np.random.seed(RANDOM_SEED)
    s = env.reset()
    n_states = poly.fit_transform(s.reshape(1, -1))
    agent = Agent(n_states.shape[1], env.action_space.n, delta=0.1, sigma=0.1, is_tabular=False)
    agent.c = 0
    mean_return = 0
    did_improve = []
    safety_dataset = generate_dataset(env, agent, 1000)
    candidate_dataset = generate_dataset(env, agent, 1000)
    ngen = 1
    for epoch in range(1000):
        print(f'Epoch: {epoch}')
        print('---------------')

        did_pass = agent.update(safety_dataset, candidate_dataset, 1, write=False)

        if did_pass:
            eval_dataset = generate_dataset(env, agent, 1000)
            gt_estimates = agent.expected_discounted_return(eval_dataset)
class MainWindow(QtWidgets.QMainWindow,
                 poker_mini_gui.Ui_MainWindow):  # poker_gui.Ui_MainWindow
    def __init__(self):
        super().__init__()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)

        # Connect signals to functions
        self.ui.PassButton.clicked.connect(self.pass_clicked)
        self.ui.CallButton.clicked.connect(self.call_clicked)
        self.ui.RaiseButton.clicked.connect(self.raise_clicked)
        self.ui.actionNew_game.triggered.connect(self.start_session)
        self.ui.actionSave_report.triggered.connect(self.save_report)
        self.ui.actionExit.triggered.connect(self.close)

        self.is_session_active = 0
        self.human_act_status = 3  # 3 means no action recorded

        self.report_text = []

        self.pass_flag = 0

    ## Response for pass-button click
    def pass_clicked(self):
        if self.is_session_active == 1:
            self.human_act_status = 0
            self.ui.statusbar.showMessage("Human passed")

    ## Response for call-button click
    def call_clicked(self):
        if self.is_session_active == 1:
            self.human_act_status = 1
            self.ui.statusbar.showMessage("Human called")

    ## Response for raise-button click
    def raise_clicked(self):
        if self.is_session_active == 1:
            self.human_act_status = 2
            self.ui.statusbar.showMessage("Human raised")

    ## Response for StartGame-button click
    def start_session(self):
        if self.is_session_active == 0:
            self.is_session_active = 1
            self.start_match()

    ## Response for StartGame-button click
    def save_report(self):
        file = open("reports\\report.txt", 'w')
        file.writelines(self.report_text)
        file.close()

    ## Main function, that connects the window and the AI program
    def start_match(self):
        self.user = User("human")
        self.opponent = Agent("AI", ValueNetwork, PolicyNetwork)

        game = Game(self.user, self.opponent)

        while self.user.points > 0 and self.opponent.points > 0:
            self.ui.PlayerScoreNumber.setText(str(self.user.points))
            self.ui.OpponentScoreNumber.setText(str(self.opponent.points))

            round = Round([self.user, self.opponent], game.player_turn)
            round.start()

            for step in range(4):
                # Set some info text
                self.ui.BetValueNumber.setText(str(round.curr_bet))
                self.ui.PlayerScoreNumber.setText(str(self.user.points))
                self.ui.OpponentScoreNumber.setText(str(self.opponent.points))

                self.ui.card1_hand1.setText(str(self.user.hand.cards[0]))
                self.ui.card2_hand1.setText(str(self.user.hand.cards[1]))

                self.ui.card1_board.setText(str(round.board.cards[0]))
                self.ui.card2_board.setText(str(round.board.cards[1]))
                if step > 0:
                    self.ui.card3_board.setText(str(round.board.cards[2]))
                if step > 1:
                    self.ui.card4_board.setText(str(round.board.cards[3]))
                if step > 2:
                    self.ui.card5_board.setText(str(round.board.cards[4]))

                # Make steps
                if game.player_turn == 0:
                    # Waiting for response
                    while self.human_act_status == 3:
                        QCoreApplication.processEvents()
                        self.ui.statusbar.showMessage(
                            "waiting for human step...")

                    # Acting
                    action = self.human_act_status
                    round.take_action(0, action)
                    self.report_text.append("Human: " + str(action) + '\n')
                    self.ui.ReportText.addItem("Human: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("AI wins" + '\n')
                        self.ui.ReportText.addItem("AI wins")
                        self.human_act_status = 3
                        break

                    action = self.opponent.act(self.opponent.hand, round.board)
                    round.take_action(1, action)
                    self.report_text.append("AI: " + str(action) + '\n')
                    self.ui.ReportText.addItem("AI: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("Human wins" + '\n')
                        self.ui.ReportText.addItem("Human wins")
                        self.human_act_status = 3
                        break

                    # Reset act status and switch turn
                    self.human_act_status = 3

                elif game.player_turn == 1:
                    # Acting
                    action = self.opponent.act(self.opponent.hand, round.board)
                    round.take_action(1, action)
                    self.report_text.append("AI: " + str(action) + '\n')
                    self.ui.ReportText.addItem("AI: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("Human wins" + '\n')
                        self.ui.ReportText.addItem("Human wins")
                        self.human_act_status = 3
                        break

                    # Waiting for response
                    while self.human_act_status == 3:
                        QCoreApplication.processEvents()
                        self.ui.statusbar.showMessage(
                            "waiting for human step...")

                    action = self.human_act_status
                    round.take_action(0, action)
                    self.report_text.append("Human: " + str(action) + '\n')
                    self.ui.ReportText.addItem("Human: " + actions[action])
                    if action == 0:
                        self.pass_flag = 1
                        self.report_text.append("AI wins" + '\n')
                        self.ui.ReportText.addItem("AI wins")
                        self.human_act_status = 3
                        break

                    # Reset act status and switch turn
                    self.human_act_status = 3

                round.open_card()

            # Calculate the round results
            self.ui.ReportText.addItem("Results:")
            self.ui.ReportText.addItem("• Human hand: " + str(self.user.hand))
            self.ui.ReportText.addItem("  AI hand:    " +
                                       str(self.opponent.hand))
            self.ui.ReportText.addItem("  Board:      " + str(round.board))
            self.ui.ReportText.addItem("• Human combination:")
            if self.user.hand.check_combination(round.board) > 14:
                self.ui.ReportText.addItem(
                    combinations[self.user.hand.check_combination(
                        round.board)])
            else:
                self.ui.ReportText.addItem("Oldest card:")
            self.ui.ReportText.addItem("  AI combination:")
            if self.opponent.hand.check_combination(round.board) > 14:
                self.ui.ReportText.addItem(
                    combinations[self.opponent.hand.check_combination(
                        round.board)])
            else:
                self.ui.ReportText.addItem("Oldest card:")

            if self.pass_flag == 0:
                winner = round.summarize()
                if winner == 2:
                    self.report_text.append("Human wins" + '\n')
                    self.ui.ReportText.addItem("Result: " + "Human wins")
                elif winner == 0:
                    self.report_text.append("AI wins" + '\n')
                    self.ui.ReportText.addItem("Result: " + "AI wins")
                else:
                    self.report_text.append("Draw" + '\n')
                    self.ui.ReportText.addItem("Result: " + "Draw")
            else:
                self.pass_flag = 0
            self.ui.ReportText.addItem(
                "_________________________________________")

            game.player_turn = 1 - game.player_turn
            self.ui.card1_hand1.clear()
            self.ui.card2_hand1.clear()

            self.ui.card1_board.clear()
            self.ui.card2_board.clear()
            self.ui.card3_board.clear()
            self.ui.card4_board.clear()
            self.ui.card5_board.clear()
Beispiel #6
0
    if os.path.isfile(f_name):
        with open(f_name, 'rb') as f:
            dataset = pickle.load(f)
    else:
        dataset = Dataset()
        dataset.build_dataset(n_episodes_to_load)
        with open(f_name, 'wb') as f:
            pickle.dump(dataset, f)

    test_split = 0.5

    batch_size = len(dataset)
    print(f'Batch size: {batch_size}')
    n_train_samples = int(batch_size * (1 - test_split))
    n_test_samples = batch_size - n_train_samples
    agent = Agent(18, 4, delta=0.05 / 2., sigma=0.1, c=1.45)

    train_idxs = np.random.choice(np.arange(batch_size),
                                  n_train_samples,
                                  replace=False)
    test_idxs = np.array(list(set(np.arange(batch_size)) - set(train_idxs)))

    safety_data = Dataset()
    candidate_data = Dataset()
    batch_episodes = copy.deepcopy(dataset.episodes[0:1 * batch_size])
    candidate_data.episodes = batch_episodes[train_idxs]
    safety_data.episodes = batch_episodes[test_idxs]
    agent.candidate_data = candidate_data
    agent.safety_data = safety_data
    for episode in tqdm(candidate_data.episodes):
        for i in range(len(episode.states)):
            # rewards[-1] = -1
            success.append(0)

        else:
            success.append(1)
        rewards[-1] = rewards[-1] * 10
        dataset.episodes.append(Episode(states, actions, rewards, pb_sas))
    print(f'Success rate: {np.mean(success)}')
    return dataset


if __name__ == '__main__':
    # np.random.seed(RANDOM_SEED)

    agent = Agent(env.observation_space.n,
                  env.action_space.n,
                  delta=0.25,
                  sigma=0.01)
    agent.c = -10
    mean_return = 0
    did_improve = []
    safety_dataset = generate_dataset(env, agent, 5000)
    candidate_dataset = generate_dataset(env, agent, 5000)
    ngen = 1
    for epoch in range(1000):
        print(f'Epoch: {epoch}')
        print('---------------')

        did_pass = agent.update(safety_dataset,
                                candidate_dataset,
                                1,
                                write=False)
Beispiel #8
0
            dataset = pickle.load(f)
    else:
        dataset = Dataset()
        dataset.build_dataset(n_episodes_to_load)
        with open(f_name, 'wb') as f:
            pickle.dump(dataset, f)

    test_split = 0.5

    batch_size = len(dataset) // 100
    print(f'Batch size: {batch_size}')
    n_train_samples = int(batch_size * (1 - test_split))
    n_test_samples = batch_size - n_train_samples

    for it in range(100):
        train_idxs = np.random.choice(np.arange(batch_size), n_train_samples, replace=False)
        test_idxs = np.array(list(set(np.arange(batch_size)) - set(train_idxs)))

        safety_data = Dataset()
        candidate_data = Dataset()
        batch_episodes = copy.deepcopy(dataset.episodes[it * batch_size: (it + 1) * batch_size])
        candidate_data.episodes = batch_episodes[train_idxs]
        safety_data.episodes = batch_episodes[test_idxs]

        agent = Agent(18, 4, delta=0.05/2., sigma=0.1, c=1.45)
        agent.policy_idx = it + 1
        while True:
            did_pass = agent.update(safety_data, candidate_data, 1)
            if did_pass:
                break
                try:
                    s_prime = episode.states[i + 1]
                except IndexError:
                    s_prime = None
                transition_function[s][a] = s_prime
                reward_function[s][a].append(r)
        for s in range(18):
            for a in range(4):
                reward_function[s][a] = (np.mean(reward_function[s][a]),
                                         np.std(reward_function[s][a]))
        with open('transition_function.pkl', 'wb') as f:
            pickle.dump(transition_function, f)
        with open('reward_function.pkl', 'wb') as f:
            pickle.dump(reward_function, f)

    agent = Agent(18, 4, delta=0.05 / 2., sigma=0.1, c=1.45)
    success = []
    for i in range(1, 101):
        print(f'Policy: {i}')
        agent.load_policy(i)
        v_est = evaluate_policy(agent, transition_function, reward_function)
        print(f'Expected Discounted Reward: {v_est[17]}')
        print('v est')
        print(v_est)
        if v_est[17] > 1.41537:
            success.append(1)
        else:
            success.append(0)
        print()
    print(f'Success Rate: {np.mean(success)}')
# # ---------------------EVALUATE-----------------------
#
# print(value_network.evaluate())
# print(policy_network.evaluate())

# -------------------NUMPY EXAMPLE--------------------
# a = [1, 2]
# b = [3, 4]

# c = np.array((a + b))
# print(c)
# print(c.size)

# value_network = ValueNetwork()
# value_network.load()
# policy_network = PolicyNetwork()
# policy_network.load()

player1 = User("human")
player2 = Agent("AI")

match = Match(player1, player2)

# ---------------------RANDOM MATCH-------------------
# random.seed()
# match=RandomMatch()
#
# random.seed()
# match.start_match()
# match.visualize_results()
# match.save_report()