def benchmark_test(primary_model, benchmark_model, num_games, benchmark_bot_name='benchmark_bot', run_id=None): epsilon = Epsilon() player_1 = Agent(name=cfg.bot_1_name, model=primary_model, epsilon=epsilon) player_2 = Agent(name=benchmark_bot_name, model=benchmark_model, epsilon=epsilon) if 'policy_net' in dir(player_1.model): player_1.model.policy_net.eval() if 'policy_net' in dir(player_2.model): player_2.model.policy_net.eval() game_output = [] for j in range(num_games): player_list = [player_1, player_2] game = Game(name="pinochle", players=player_list, run_id=run_id, current_cycle=None) game.deal() game_output.append(game.play()) winner_list, exp_df = pu.parse_game_output(game_output=game_output) if run_id is not None: # Store history db.upload_exp(df=exp_df) return 1 - sum(winner_list) / len(winner_list)
def trainAgent(tries, episodes, scenario, teacherAgent=None, feedbackStrategy=0, feedbackParameter=0.0): if teacherAgent == None: filenameSteps = resultsFolder + 'stepsRL.csv' filenameRewards = resultsFolder + 'rewardsRL.csv' filenameFailures = resultsFolder + 'failuresRL.csv' else: filenameSteps = resultsFolder + 'stepsIRL.csv' filenameRewards = resultsFolder + 'rewardsIRL.csv' filenameFailures = resultsFolder + 'failuresIRL.csv' files.createFile(filenameSteps) files.createFile(filenameRewards) files.createFile(filenameFailures) for i in range(tries): print('Training agent number: ' + str(i + 1)) agent = Agent(scenario) [steps, rewards, failures] = agent.train(episodes, teacherAgent, feedbackStrategy, feedbackParameter) files.addToFile(filenameSteps, steps) files.addFloatToFile(filenameRewards, rewards) files.addToFile(filenameFailures, failures) # endfor return agent
def initAgents(): for a in range(agentNum-1): newAgent = Agent(size=agentSize, x=0, y=0, z=0) newAgent.onRadius( size=agentSize, limit=diffusionLimit ) agents.append(newAgent)
def trainAgent(tries, episodes, scenario, explorationstrat, learning_alg, alpha, gamma, epsilon, sigma, delta, tau, suffix): filenameSteps = resultsFolder + 'stepsRL_' + str(suffix) + '.csv' filenameRewards = resultsFolder + 'rewardsRL_' + str(suffix) + '.csv' files.createFile(filenameSteps) files.createFile(filenameRewards) for i in range(tries): print('Training agent number: ' + str(i+1)) agent = Agent(scenario, alpha, gamma, epsilon, sigma, delta, tau) [steps, rewards] = agent.train(episodes, explorationstrat, learning_alg) files.addToFile(filenameSteps, steps) files.addFloatToFile(filenameRewards, rewards) #endfor return agent
def human_test(model): epsilon = Epsilon() player_1 = Agent(name=cfg.bot_1_name, model=model, epsilon=epsilon) player_2 = Human("YOU") if 'policy_net' in dir(model): model.policy_net.eval() # Set logging level to debug logging.getLogger().setLevel(logging.DEBUG) logging.info("Human test enabled, initializing AI uprising...") # Initialize game player_list = [player_1, player_2] game = Game(name="pinochle", players=player_list, run_id=None, current_cycle=None, human_test=True) game.deal() game.play() # Set logging level back to config logging.getLogger().setLevel(cfg.logging_level)
# add the agent to the tree tree.append(agents[a]) writer.writerow([ agents[a].size, agents[a].x, agents[a].y, agents[a].z ]) # change constants if agentSize > minAgentSize: agentSize *= shrink # reset the agent del agents[a] newAgent = Agent(x=0, y=0, z=0, size=agentSize) newAgent.onRadius( limit=diffusionLimit, size=agentSize ) agents.append(newAgent) if diffusionLimit < maxDiffusionDistance: if abs(tree[c].x) >= diffusionLimit*0.99: diffusionLimit = round(abs(tree[c].x)*1.025, 1) if abs(tree[c].y) >= diffusionLimit*0.99: diffusionLimit = round(abs(tree[c].y)*1.025, 1) if abs(tree[c].z) >= diffusionLimit*0.99:
# # self.train_self(num_epochs=5, # exp_gen=exp_gen, # is_storing_history=True) # states = data.state.to(self.device) # actions = data.action.to(self.device) # meld_actions = data.meld_action.to(self.device) # next_states = data.next_state.to(self.device) # rewards = data.reward.to(self.device) # # pu.train_model(model=self, config=cfg) epsilon = Epsilon('eval') player_1 = Agent(name=cfg.random_bot_name + '1', model=RandomBot(), epsilon=epsilon) player_2 = Agent(name=cfg.random_bot_name + '2', model=RandomBot(), epsilon=epsilon) player_human = Human(name='Me, no you?') player_list = [player_1, player_2] game = Game(name=cfg.game, players=player_list, run_id='TEST', current_cycle=None) game.deal() winner_index, exp_df = game.play() db.upload_exp(df=exp_df)
def round_robin(model_list, num_games, verbose=True, plot=True, device='cuda:0'): start_time = time.time() epsilon = Epsilon model_wins = OrderedDict() for i, model in enumerate(model_list): model_wins[f'Player {i}'] = [0, model] if 'device' in dir(model) and model.device != device: model.policy_net = model.policy_net.to(device) for i, p1_model in enumerate(model_list): for j, p2_model in enumerate(model_list): if i < j: round_start_time = time.time() p1 = Agent(name=f'Player {i}', model=p1_model, epsilon=epsilon) p2 = Agent(name=f'Player {j}', model=p2_model, epsilon=epsilon) if verbose: print(f'Player {i} vs. Player {j}...') p1_wins = int( benchmark_test(primary_model=p1_model, benchmark_model=p2_model, num_games=num_games) * num_games) p2_wins = int(num_games - p1_wins) if verbose: print( f'Player {i}: {p1_wins}\tPlayer {j}: {p2_wins}\tDuration: {util.get_pretty_time(time.time() - round_start_time)}' ) print(cs.DIVIDER) model_wins[p1.name][0] += p1_wins model_wins[p2.name][0] += p2_wins output = sorted(model_wins.items(), key=lambda kv: kv[1][0], reverse=True) if verbose: for i, model in enumerate(output): print(f'Rank {i+1}: {model[0]} with {model[1][0]} wins') total_games = len(model_list) / 2 * (len(model_list) - 1) * num_games total_duration = time.time() - start_time avg_time_per_game = total_duration / total_games print( f'{total_games} total games played over {util.get_pretty_time(total_duration)} ({util.get_pretty_time(avg_time_per_game)} per game)' ) if plot: xs = [x[0] for x in model_wins.items()] heights = [x[1][0] for x in model_wins.items()] plt.bar(height=heights, x=xs) plt.title('Round Robin Tournament Results') plt.xlabel('Model') plt.ylabel('Total Number of Wins') return output
def run_full_experiment(config): # archiving old experience db.archive_exp(db.get_all_exp()) db.delete_all_exp() util.setup_file_logger(name=config.run_id, filename=config.run_id) logger = logging.getLogger(config.run_id) start_time = time.time() # Define players model_1 = DQN(run_id=config.run_id, **config.DQN_params) model_2 = model_1.copy() epsilon = Epsilon(epsilon_func=config.epsilon_func, max_epsilon=config.max_epsilon, min_epsilon=config.min_epsilon, eval_epsilon=config.eval_epsilon, num_cycles=config.num_cycles, decrement=config.epsilon_decrement) player_list = [ Agent(name=config.bot_1_name, model=model_1, epsilon=epsilon), Agent(name=config.bot_2_name, model=model_2, epsilon=epsilon) ] winner_list = [] previous_experience_id = 0 util.save_config(config=config, path=config.run_id) # For each cycle logger.info('Beginning run titled: ' + config.run_id) logger.info(cs.DIVIDER) for i in range(1, config.num_cycles + 1): # For each episode, play through episode and insert each state/action pair into the database logger.info('Beginning cycle: ' + str(i) + ' / ' + str(config.num_cycles) + '\tCumulative Time Elapsed: ' + util.get_pretty_time(time.time() - start_time)) logger.info( f'Current Epsilon: {epsilon.get_epsilon(current_cycle=i):.3f}') cycle_start_time = time.time() # Async parallelization. May want to consider doing cpu_count - 1 to allow user to do things while it runs. Sux cuz of memory copying I think. # with mp.Pool(mp.cpu_count() - 1) as pool: # game_output = pool.starmap_async(parallel.play_game, [(config.game, player_list, config.run_id, i) for j in range(config.episodes_per_cycle)]).get() # Old serial method winner_list += pu.play_games(num_games=config.episodes_per_cycle, name=config.game, players=player_list, run_id=config.run_id, current_cycle=i, config=config) logger.info('Data collection complete.\tTotal Episode Time: ' + util.get_pretty_time(time.time() - cycle_start_time)) logger.info('Loading experience and training model...') training_start_time = time.time() # Import data from database based on experience replay buffer and train model pu.train_model(model=model_1, config=config) logger.info('Model training complete.\tTotal Training Time: ' + util.get_pretty_time(time.time() - training_start_time)) # Update model_2 if i % config.player_2_update_freq == 0: logger.info(cs.DIVIDER) logger.info( 'Storing history and setting model 2 equal to model 1...') player_list[0].model.policy_net.store_history() player_list[1].set_model(model=model_1.copy()) # Benchmark if i % config.benchmark_freq == 0: logger.info(cs.DIVIDER) logger.info('Benchmarking...') # List of player 1's win rate against player 2 by cycle benchmark_cycle_win_rate = 1 - sum(winner_list) / len(winner_list) winner_list = [] # Reset winner list # Play against random bot and measure win rate random_win_rate = benchmark.benchmark_test( primary_model=model_1, benchmark_model=RandomBot(), benchmark_bot_name=config.random_bot_name, num_games=config.random_bot_cycles, run_id=config.run_id if config.log_random_benchmark else None) logger.info( f'Winrate vs. Random Bot: {random_win_rate * 100:.1f}%') # Play against expert policy bot and measure win rate # expert_policy_win_rate = benchmark.benchmark_test(primary_model=model_1, benchmark_model=ExpertPolicy(), benchmark_bot_name=config.expert_policy_bot_name, # num_games=config.random_bot_cycles, run_id=config.run_id if config.log_expert_policy_benchmark else None) # logger.info(f'Winrate vs. Expert Policy: {expert_policy_win_rate * 100:.1f}%') # Collect average reward from database average_reward = benchmark.get_average_reward( run_id=config.run_id, previous_experience_id=previous_experience_id, agent_id=config.bot_1_name, opponent_id=config.bot_2_name) db.insert_metrics(run_id=config.run_id, win_rate=benchmark_cycle_win_rate, win_rate_random=random_win_rate, win_rate_expert_policy=0.0, average_reward=average_reward) previous_experience_id = db.get_max_id(config.run_id) # Checkpoint if config.checkpoint_freq is not None and i % config.checkpoint_freq == 0: logger.info(cs.DIVIDER) logger.info('Model checkpoint reached. Saving checkpoint...') model_1.save(folder=os.path.join(config.checkpoint_folder, config.run_id), title=util.get_checkpoint_model_name(cycle=i)) logger.info('Cycle complete.\tTotal Cycle Time: ' + util.get_pretty_time(time.time() - cycle_start_time)) logger.info(cs.DIVIDER) logging.info('Training complete.\tTotal Run Time: ' + util.get_pretty_time(time.time() - start_time) + '\tSaving model and exiting...') model_1.save(title=config.run_id)