def __init__(self): self.report_text = [] self.points_distance_history = [] self.pass_flag = 0 self.player = RandomPlayer("Random player") self.opponent = Agent("AI", ValueNetwork, PolicyNetwork)
def start_match(self): self.user = User("human") self.opponent = Agent("AI", ValueNetwork, PolicyNetwork) game = Game(self.user, self.opponent) while self.user.points > 0 and self.opponent.points > 0: self.ui.PlayerScoreNumber.setText(str(self.user.points)) self.ui.OpponentScoreNumber.setText(str(self.opponent.points)) round = Round([self.user, self.opponent], game.player_turn) round.start() for step in range(4): # Set some info text self.ui.BetValueNumber.setText(str(round.curr_bet)) self.ui.PlayerScoreNumber.setText(str(self.user.points)) self.ui.OpponentScoreNumber.setText(str(self.opponent.points)) self.ui.card1_hand1.setText(str(self.user.hand.cards[0])) self.ui.card2_hand1.setText(str(self.user.hand.cards[1])) self.ui.card1_board.setText(str(round.board.cards[0])) self.ui.card2_board.setText(str(round.board.cards[1])) if step > 0: self.ui.card3_board.setText(str(round.board.cards[2])) if step > 1: self.ui.card4_board.setText(str(round.board.cards[3])) if step > 2: self.ui.card5_board.setText(str(round.board.cards[4])) # Make steps if game.player_turn == 0: # Waiting for response while self.human_act_status == 3: QCoreApplication.processEvents() self.ui.statusbar.showMessage( "waiting for human step...") # Acting action = self.human_act_status round.take_action(0, action) self.report_text.append("Human: " + str(action) + '\n') self.ui.ReportText.addItem("Human: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("AI wins" + '\n') self.ui.ReportText.addItem("AI wins") self.human_act_status = 3 break action = self.opponent.act(self.opponent.hand, round.board) round.take_action(1, action) self.report_text.append("AI: " + str(action) + '\n') self.ui.ReportText.addItem("AI: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("Human wins" + '\n') self.ui.ReportText.addItem("Human wins") self.human_act_status = 3 break # Reset act status and switch turn self.human_act_status = 3 elif game.player_turn == 1: # Acting action = self.opponent.act(self.opponent.hand, round.board) round.take_action(1, action) self.report_text.append("AI: " + str(action) + '\n') self.ui.ReportText.addItem("AI: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("Human wins" + '\n') self.ui.ReportText.addItem("Human wins") self.human_act_status = 3 break # Waiting for response while self.human_act_status == 3: QCoreApplication.processEvents() self.ui.statusbar.showMessage( "waiting for human step...") action = self.human_act_status round.take_action(0, action) self.report_text.append("Human: " + str(action) + '\n') self.ui.ReportText.addItem("Human: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("AI wins" + '\n') self.ui.ReportText.addItem("AI wins") self.human_act_status = 3 break # Reset act status and switch turn self.human_act_status = 3 round.open_card() # Calculate the round results self.ui.ReportText.addItem("Results:") self.ui.ReportText.addItem("• Human hand: " + str(self.user.hand)) self.ui.ReportText.addItem(" AI hand: " + str(self.opponent.hand)) self.ui.ReportText.addItem(" Board: " + str(round.board)) self.ui.ReportText.addItem("• Human combination:") if self.user.hand.check_combination(round.board) > 14: self.ui.ReportText.addItem( combinations[self.user.hand.check_combination( round.board)]) else: self.ui.ReportText.addItem("Oldest card:") self.ui.ReportText.addItem(" AI combination:") if self.opponent.hand.check_combination(round.board) > 14: self.ui.ReportText.addItem( combinations[self.opponent.hand.check_combination( round.board)]) else: self.ui.ReportText.addItem("Oldest card:") if self.pass_flag == 0: winner = round.summarize() if winner == 2: self.report_text.append("Human wins" + '\n') self.ui.ReportText.addItem("Result: " + "Human wins") elif winner == 0: self.report_text.append("AI wins" + '\n') self.ui.ReportText.addItem("Result: " + "AI wins") else: self.report_text.append("Draw" + '\n') self.ui.ReportText.addItem("Result: " + "Draw") else: self.pass_flag = 0 self.ui.ReportText.addItem( "_________________________________________") game.player_turn = 1 - game.player_turn self.ui.card1_hand1.clear() self.ui.card2_hand1.clear() self.ui.card1_board.clear() self.ui.card2_board.clear() self.ui.card3_board.clear() self.ui.card4_board.clear() self.ui.card5_board.clear()
class RandomMatch: def __init__(self): self.report_text = [] self.points_distance_history = [] self.pass_flag = 0 self.player = RandomPlayer("Random player") self.opponent = Agent("AI", ValueNetwork, PolicyNetwork) def start_match(self): game = Game(self.player, self.opponent) while self.player.points > 0 and self.opponent.points > 0: self.points_distance_history.append(self.player.points - self.opponent.points) print("Player: " + str(self.player.points) + " Opponent: " + str(self.opponent.points)) round = Round([self.player, self.opponent], game.player_turn) round.start() for step in range(4): # Make steps if game.player_turn == 0: # Acting action = self.player.act() round.take_action(0, action) self.report_text.append("Player: " + str(action) + '\n') if action == 0: self.pass_flag = 1 self.report_text.append("AI wins" + '\n') game.player_turn = 1 break action = self.opponent.act(self.opponent.hand, round.board) round.take_action(1, action) self.report_text.append("AI: " + str(action) + '\n') if action == 0: self.pass_flag = 1 self.report_text.append("Player wins" + '\n') game.player_turn = 1 break # Switch turn game.player_turn = 1 elif game.player_turn == 1: # Acting action = self.opponent.act(self.opponent.hand, round.board) round.take_action(1, action) self.report_text.append("AI: " + str(action) + '\n') if action == 0: self.pass_flag = 1 self.report_text.append("Player wins" + '\n') game.player_turn = 0 break action = self.player.act() round.take_action(0, action) self.report_text.append("Player: " + str(action) + '\n') if action == 0: self.pass_flag = 1 self.report_text.append("AI wins" + '\n') game.player_turn = 0 break # Reset act status and switch turn game.player_turn = 0 round.open_card() if self.pass_flag == 0: winner = round.summarize() if winner == 2: self.report_text.append("Player wins" + '\n') elif winner == 0: self.report_text.append("AI wins" + '\n') else: self.report_text.append("Draw" + '\n') else: self.pass_flag = 0 ## Visualize results of the match def visualize_results(self): plt.plot(self.points_distance_history) plt.title('Points change') plt.ylabel('points difference') plt.xlabel('round') plt.show() ## Saves the report about the last match def save_report(self): file = open("reports\\random_report.txt", 'w') file.writelines(self.report_text) file.close()
states.append(s) actions.append(a) rewards.append(r) pb_sas.append(action_probs[a]) s = poly.fit_transform(next_state.reshape(1, -1)) rewards[-1] = rewards[-1] * 10 dataset.episodes.append(Episode(states, actions, rewards, pb_sas)) return dataset if __name__ == '__main__': # np.random.seed(RANDOM_SEED) s = env.reset() n_states = poly.fit_transform(s.reshape(1, -1)) agent = Agent(n_states.shape[1], env.action_space.n, delta=0.1, sigma=0.1, is_tabular=False) agent.c = 0 mean_return = 0 did_improve = [] safety_dataset = generate_dataset(env, agent, 1000) candidate_dataset = generate_dataset(env, agent, 1000) ngen = 1 for epoch in range(1000): print(f'Epoch: {epoch}') print('---------------') did_pass = agent.update(safety_dataset, candidate_dataset, 1, write=False) if did_pass: eval_dataset = generate_dataset(env, agent, 1000) gt_estimates = agent.expected_discounted_return(eval_dataset)
class MainWindow(QtWidgets.QMainWindow, poker_mini_gui.Ui_MainWindow): # poker_gui.Ui_MainWindow def __init__(self): super().__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) # Connect signals to functions self.ui.PassButton.clicked.connect(self.pass_clicked) self.ui.CallButton.clicked.connect(self.call_clicked) self.ui.RaiseButton.clicked.connect(self.raise_clicked) self.ui.actionNew_game.triggered.connect(self.start_session) self.ui.actionSave_report.triggered.connect(self.save_report) self.ui.actionExit.triggered.connect(self.close) self.is_session_active = 0 self.human_act_status = 3 # 3 means no action recorded self.report_text = [] self.pass_flag = 0 ## Response for pass-button click def pass_clicked(self): if self.is_session_active == 1: self.human_act_status = 0 self.ui.statusbar.showMessage("Human passed") ## Response for call-button click def call_clicked(self): if self.is_session_active == 1: self.human_act_status = 1 self.ui.statusbar.showMessage("Human called") ## Response for raise-button click def raise_clicked(self): if self.is_session_active == 1: self.human_act_status = 2 self.ui.statusbar.showMessage("Human raised") ## Response for StartGame-button click def start_session(self): if self.is_session_active == 0: self.is_session_active = 1 self.start_match() ## Response for StartGame-button click def save_report(self): file = open("reports\\report.txt", 'w') file.writelines(self.report_text) file.close() ## Main function, that connects the window and the AI program def start_match(self): self.user = User("human") self.opponent = Agent("AI", ValueNetwork, PolicyNetwork) game = Game(self.user, self.opponent) while self.user.points > 0 and self.opponent.points > 0: self.ui.PlayerScoreNumber.setText(str(self.user.points)) self.ui.OpponentScoreNumber.setText(str(self.opponent.points)) round = Round([self.user, self.opponent], game.player_turn) round.start() for step in range(4): # Set some info text self.ui.BetValueNumber.setText(str(round.curr_bet)) self.ui.PlayerScoreNumber.setText(str(self.user.points)) self.ui.OpponentScoreNumber.setText(str(self.opponent.points)) self.ui.card1_hand1.setText(str(self.user.hand.cards[0])) self.ui.card2_hand1.setText(str(self.user.hand.cards[1])) self.ui.card1_board.setText(str(round.board.cards[0])) self.ui.card2_board.setText(str(round.board.cards[1])) if step > 0: self.ui.card3_board.setText(str(round.board.cards[2])) if step > 1: self.ui.card4_board.setText(str(round.board.cards[3])) if step > 2: self.ui.card5_board.setText(str(round.board.cards[4])) # Make steps if game.player_turn == 0: # Waiting for response while self.human_act_status == 3: QCoreApplication.processEvents() self.ui.statusbar.showMessage( "waiting for human step...") # Acting action = self.human_act_status round.take_action(0, action) self.report_text.append("Human: " + str(action) + '\n') self.ui.ReportText.addItem("Human: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("AI wins" + '\n') self.ui.ReportText.addItem("AI wins") self.human_act_status = 3 break action = self.opponent.act(self.opponent.hand, round.board) round.take_action(1, action) self.report_text.append("AI: " + str(action) + '\n') self.ui.ReportText.addItem("AI: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("Human wins" + '\n') self.ui.ReportText.addItem("Human wins") self.human_act_status = 3 break # Reset act status and switch turn self.human_act_status = 3 elif game.player_turn == 1: # Acting action = self.opponent.act(self.opponent.hand, round.board) round.take_action(1, action) self.report_text.append("AI: " + str(action) + '\n') self.ui.ReportText.addItem("AI: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("Human wins" + '\n') self.ui.ReportText.addItem("Human wins") self.human_act_status = 3 break # Waiting for response while self.human_act_status == 3: QCoreApplication.processEvents() self.ui.statusbar.showMessage( "waiting for human step...") action = self.human_act_status round.take_action(0, action) self.report_text.append("Human: " + str(action) + '\n') self.ui.ReportText.addItem("Human: " + actions[action]) if action == 0: self.pass_flag = 1 self.report_text.append("AI wins" + '\n') self.ui.ReportText.addItem("AI wins") self.human_act_status = 3 break # Reset act status and switch turn self.human_act_status = 3 round.open_card() # Calculate the round results self.ui.ReportText.addItem("Results:") self.ui.ReportText.addItem("• Human hand: " + str(self.user.hand)) self.ui.ReportText.addItem(" AI hand: " + str(self.opponent.hand)) self.ui.ReportText.addItem(" Board: " + str(round.board)) self.ui.ReportText.addItem("• Human combination:") if self.user.hand.check_combination(round.board) > 14: self.ui.ReportText.addItem( combinations[self.user.hand.check_combination( round.board)]) else: self.ui.ReportText.addItem("Oldest card:") self.ui.ReportText.addItem(" AI combination:") if self.opponent.hand.check_combination(round.board) > 14: self.ui.ReportText.addItem( combinations[self.opponent.hand.check_combination( round.board)]) else: self.ui.ReportText.addItem("Oldest card:") if self.pass_flag == 0: winner = round.summarize() if winner == 2: self.report_text.append("Human wins" + '\n') self.ui.ReportText.addItem("Result: " + "Human wins") elif winner == 0: self.report_text.append("AI wins" + '\n') self.ui.ReportText.addItem("Result: " + "AI wins") else: self.report_text.append("Draw" + '\n') self.ui.ReportText.addItem("Result: " + "Draw") else: self.pass_flag = 0 self.ui.ReportText.addItem( "_________________________________________") game.player_turn = 1 - game.player_turn self.ui.card1_hand1.clear() self.ui.card2_hand1.clear() self.ui.card1_board.clear() self.ui.card2_board.clear() self.ui.card3_board.clear() self.ui.card4_board.clear() self.ui.card5_board.clear()
if os.path.isfile(f_name): with open(f_name, 'rb') as f: dataset = pickle.load(f) else: dataset = Dataset() dataset.build_dataset(n_episodes_to_load) with open(f_name, 'wb') as f: pickle.dump(dataset, f) test_split = 0.5 batch_size = len(dataset) print(f'Batch size: {batch_size}') n_train_samples = int(batch_size * (1 - test_split)) n_test_samples = batch_size - n_train_samples agent = Agent(18, 4, delta=0.05 / 2., sigma=0.1, c=1.45) train_idxs = np.random.choice(np.arange(batch_size), n_train_samples, replace=False) test_idxs = np.array(list(set(np.arange(batch_size)) - set(train_idxs))) safety_data = Dataset() candidate_data = Dataset() batch_episodes = copy.deepcopy(dataset.episodes[0:1 * batch_size]) candidate_data.episodes = batch_episodes[train_idxs] safety_data.episodes = batch_episodes[test_idxs] agent.candidate_data = candidate_data agent.safety_data = safety_data for episode in tqdm(candidate_data.episodes): for i in range(len(episode.states)):
# rewards[-1] = -1 success.append(0) else: success.append(1) rewards[-1] = rewards[-1] * 10 dataset.episodes.append(Episode(states, actions, rewards, pb_sas)) print(f'Success rate: {np.mean(success)}') return dataset if __name__ == '__main__': # np.random.seed(RANDOM_SEED) agent = Agent(env.observation_space.n, env.action_space.n, delta=0.25, sigma=0.01) agent.c = -10 mean_return = 0 did_improve = [] safety_dataset = generate_dataset(env, agent, 5000) candidate_dataset = generate_dataset(env, agent, 5000) ngen = 1 for epoch in range(1000): print(f'Epoch: {epoch}') print('---------------') did_pass = agent.update(safety_dataset, candidate_dataset, 1, write=False)
dataset = pickle.load(f) else: dataset = Dataset() dataset.build_dataset(n_episodes_to_load) with open(f_name, 'wb') as f: pickle.dump(dataset, f) test_split = 0.5 batch_size = len(dataset) // 100 print(f'Batch size: {batch_size}') n_train_samples = int(batch_size * (1 - test_split)) n_test_samples = batch_size - n_train_samples for it in range(100): train_idxs = np.random.choice(np.arange(batch_size), n_train_samples, replace=False) test_idxs = np.array(list(set(np.arange(batch_size)) - set(train_idxs))) safety_data = Dataset() candidate_data = Dataset() batch_episodes = copy.deepcopy(dataset.episodes[it * batch_size: (it + 1) * batch_size]) candidate_data.episodes = batch_episodes[train_idxs] safety_data.episodes = batch_episodes[test_idxs] agent = Agent(18, 4, delta=0.05/2., sigma=0.1, c=1.45) agent.policy_idx = it + 1 while True: did_pass = agent.update(safety_data, candidate_data, 1) if did_pass: break
try: s_prime = episode.states[i + 1] except IndexError: s_prime = None transition_function[s][a] = s_prime reward_function[s][a].append(r) for s in range(18): for a in range(4): reward_function[s][a] = (np.mean(reward_function[s][a]), np.std(reward_function[s][a])) with open('transition_function.pkl', 'wb') as f: pickle.dump(transition_function, f) with open('reward_function.pkl', 'wb') as f: pickle.dump(reward_function, f) agent = Agent(18, 4, delta=0.05 / 2., sigma=0.1, c=1.45) success = [] for i in range(1, 101): print(f'Policy: {i}') agent.load_policy(i) v_est = evaluate_policy(agent, transition_function, reward_function) print(f'Expected Discounted Reward: {v_est[17]}') print('v est') print(v_est) if v_est[17] > 1.41537: success.append(1) else: success.append(0) print() print(f'Success Rate: {np.mean(success)}')
# # ---------------------EVALUATE----------------------- # # print(value_network.evaluate()) # print(policy_network.evaluate()) # -------------------NUMPY EXAMPLE-------------------- # a = [1, 2] # b = [3, 4] # c = np.array((a + b)) # print(c) # print(c.size) # value_network = ValueNetwork() # value_network.load() # policy_network = PolicyNetwork() # policy_network.load() player1 = User("human") player2 = Agent("AI") match = Match(player1, player2) # ---------------------RANDOM MATCH------------------- # random.seed() # match=RandomMatch() # # random.seed() # match.start_match() # match.visualize_results() # match.save_report()